From 89f9f6796d41e10e224b0cb0027ddd78cb881f65 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 27 Feb 2015 11:25:59 -0800
Subject: hv: vmbus_post_msg: retry the hypercall on some transient errors

I got HV_STATUS_INVALID_CONNECTION_ID on Hyper-V 2008 R2 when keeping running
"rmmod hv_netvsc; modprobe hv_netvsc; rmmod hv_utils; modprobe hv_utils"
in a Linux guest. Looks the host has some kind of throttling mechanism if
some kinds of hypercalls are sent too frequently.
Without the patch, the driver can occasionally fail to load.

Also let's retry HV_STATUS_INSUFFICIENT_MEMORY, though we didn't get it
before.

Removed 'case -ENOMEM', since the hypervisor doesn't return this.

CC: "K. Y. Srinivasan" <kys@microsoft.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/uapi/asm/hyperv.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 90c458e66e13..ce6068dbcfbc 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -225,6 +225,8 @@
 #define HV_STATUS_INVALID_HYPERCALL_CODE	2
 #define HV_STATUS_INVALID_HYPERCALL_INPUT	3
 #define HV_STATUS_INVALID_ALIGNMENT		4
+#define HV_STATUS_INSUFFICIENT_MEMORY		11
+#define HV_STATUS_INVALID_CONNECTION_ID		18
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
 typedef struct _HV_REFERENCE_TSC_PAGE {
-- 
cgit v1.2.3


From 1bd187de536494a27925902b9653e9ef0ace4774 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 23 Feb 2015 16:24:44 +0200
Subject: x86, intel-mid: remove Intel MID specific serial support

Since we have a native 8250 driver carrying the Intel MID serial devices the
specific support is not needed anymore. This patch removes it for Intel MID.

Note that the console device name is changed from ttyMFDx to ttySx.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/Kconfig.debug                             |    4 -
 arch/x86/include/asm/intel-mid.h                   |    3 -
 arch/x86/kernel/early_printk.c                     |    6 -
 arch/x86/platform/intel-mid/Makefile               |    1 -
 .../platform/intel-mid/early_printk_intel_mid.c    |  112 --
 drivers/tty/serial/Kconfig                         |   10 -
 drivers/tty/serial/Makefile                        |    1 -
 drivers/tty/serial/mfd.c                           | 1505 --------------------
 include/linux/serial_mfd.h                         |   47 -
 include/uapi/linux/serial_reg.h                    |   19 -
 10 files changed, 1708 deletions(-)
 delete mode 100644 arch/x86/platform/intel-mid/early_printk_intel_mid.c
 delete mode 100644 drivers/tty/serial/mfd.c
 delete mode 100644 include/linux/serial_mfd.h

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 20028da8ae18..72484a645f05 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,10 +43,6 @@ config EARLY_PRINTK
 	  with klogd/syslogd or the X server. You should normally N here,
 	  unless you want to debug such a crash.
 
-config EARLY_PRINTK_INTEL_MID
-	bool "Early printk for Intel MID platform support"
-	depends on EARLY_PRINTK && X86_INTEL_MID
-
 config EARLY_PRINTK_DBGP
 	bool "Early printk via EHCI debug port"
 	depends on EARLY_PRINTK && PCI
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 705d35708a50..7c5af123bdbd 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
 #define SFI_MTMR_MAX_NUM 8
 #define SFI_MRTC_MAX	8
 
-extern struct console early_hsu_console;
-extern void hsu_early_console_init(const char *);
-
 extern void intel_scu_devices_create(void);
 extern void intel_scu_devices_destroy(void);
 
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index a62536a1be88..f85e3fb50f28 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf)
 		if (!strncmp(buf, "xen", 3))
 			early_console_register(&xenboot_console, keep);
 #endif
-#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
-		if (!strncmp(buf, "hsu", 3)) {
-			hsu_early_console_init(buf + 3);
-			early_console_register(&early_hsu_console, keep);
-		}
-#endif
 #ifdef CONFIG_EARLY_PRINTK_EFI
 		if (!strncmp(buf, "efi", 3))
 			early_console_register(&early_efi_console, keep);
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 0a8ee703b9fa..0ce1b1913673 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o
-obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
 
 # SFI specific code
 ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
deleted file mode 100644
index 4e720829ab90..000000000000
--- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * early_printk_intel_mid.c - early consoles for Intel MID platforms
- *
- * Copyright (c) 2008-2010, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/*
- * This file implements early console named hsu.
- * hsu is based on a High Speed UART device which only exists in the Medfield
- * platform
- */
-
-#include <linux/serial_reg.h>
-#include <linux/serial_mfd.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/intel-mid.h>
-
-/*
- * Following is the early console based on Medfield HSU (High
- * Speed UART) device.
- */
-#define HSU_PORT_BASE		0xffa28080
-
-static void __iomem *phsu;
-
-void hsu_early_console_init(const char *s)
-{
-	unsigned long paddr, port = 0;
-	u8 lcr;
-
-	/*
-	 * Select the early HSU console port if specified by user in the
-	 * kernel command line.
-	 */
-	if (*s && !kstrtoul(s, 10, &port))
-		port = clamp_val(port, 0, 2);
-
-	paddr = HSU_PORT_BASE + port * 0x80;
-	phsu = (void __iomem *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr);
-
-	/* Disable FIFO */
-	writeb(0x0, phsu + UART_FCR);
-
-	/* Set to default 115200 bps, 8n1 */
-	lcr = readb(phsu + UART_LCR);
-	writeb((0x80 | lcr), phsu + UART_LCR);
-	writeb(0x18, phsu + UART_DLL);
-	writeb(lcr,  phsu + UART_LCR);
-	writel(0x3600, phsu + UART_MUL*4);
-
-	writeb(0x8, phsu + UART_MCR);
-	writeb(0x7, phsu + UART_FCR);
-	writeb(0x3, phsu + UART_LCR);
-
-	/* Clear IRQ status */
-	readb(phsu + UART_LSR);
-	readb(phsu + UART_RX);
-	readb(phsu + UART_IIR);
-	readb(phsu + UART_MSR);
-
-	/* Enable FIFO */
-	writeb(0x7, phsu + UART_FCR);
-}
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-static void early_hsu_putc(char ch)
-{
-	unsigned int timeout = 10000; /* 10ms */
-	u8 status;
-
-	while (--timeout) {
-		status = readb(phsu + UART_LSR);
-		if (status & BOTH_EMPTY)
-			break;
-		udelay(1);
-	}
-
-	/* Only write the char when there was no timeout */
-	if (timeout)
-		writeb(ch, phsu + UART_TX);
-}
-
-static void early_hsu_write(struct console *con, const char *str, unsigned n)
-{
-	int i;
-
-	for (i = 0; i < n && *str; i++) {
-		if (*str == '\n')
-			early_hsu_putc('\r');
-		early_hsu_putc(*str);
-		str++;
-	}
-}
-
-struct console early_hsu_console = {
-	.name =		"earlyhsu",
-	.write =	early_hsu_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 60c368a5dbf1..c9dbc626038d 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -483,16 +483,6 @@ config SERIAL_SA1100_CONSOLE
 	  your boot loader (lilo or loadlin) about how to pass options to the
 	  kernel at boot time.)
 
-config SERIAL_MFD_HSU
-	tristate "Medfield High Speed UART support"
-	depends on PCI
-	select SERIAL_CORE
-
-config SERIAL_MFD_HSU_CONSOLE
-	bool "Medfield HSU serial console support"
-	depends on SERIAL_MFD_HSU=y
-	select SERIAL_CORE_CONSOLE
-
 config SERIAL_BFIN
 	tristate "Blackfin serial port support"
 	depends on BLACKFIN
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 599be4b05a26..f42b4f9845df 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -78,7 +78,6 @@ obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
 obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o
 obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o
 obj-$(CONFIG_SERIAL_VT8500) += vt8500_serial.o
-obj-$(CONFIG_SERIAL_MFD_HSU)	+= mfd.o
 obj-$(CONFIG_SERIAL_IFX6X60)  	+= ifx6x60.o
 obj-$(CONFIG_SERIAL_PCH_UART)	+= pch_uart.o
 obj-$(CONFIG_SERIAL_MSM_SMD)	+= msm_smd_tty.o
diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c
deleted file mode 100644
index 8fe4501d7565..000000000000
--- a/drivers/tty/serial/mfd.c
+++ /dev/null
@@ -1,1505 +0,0 @@
-/*
- * mfd.c: driver for High Speed UART device of Intel Medfield platform
- *
- * Refer pxa.c, 8250.c and some other drivers in drivers/serial/
- *
- * (C) Copyright 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/* Notes:
- * 1. DMA channel allocation: 0/1 channel are assigned to port 0,
- *    2/3 chan to port 1, 4/5 chan to port 3. Even number chans
- *    are used for RX, odd chans for TX
- *
- * 2. The RI/DSR/DCD/DTR are not pinned out, DCD & DSR are always
- *    asserted, only when the HW is reset the DDCD and DDSR will
- *    be triggered
- */
-
-#if defined(CONFIG_SERIAL_MFD_HSU_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/slab.h>
-#include <linux/serial_reg.h>
-#include <linux/circ_buf.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/serial_core.h>
-#include <linux/serial_mfd.h>
-#include <linux/dma-mapping.h>
-#include <linux/pci.h>
-#include <linux/nmi.h>
-#include <linux/io.h>
-#include <linux/debugfs.h>
-#include <linux/pm_runtime.h>
-
-#define HSU_DMA_BUF_SIZE	2048
-
-#define chan_readl(chan, offset)	readl(chan->reg + offset)
-#define chan_writel(chan, offset, val)	writel(val, chan->reg + offset)
-
-#define mfd_readl(obj, offset)		readl(obj->reg + offset)
-#define mfd_writel(obj, offset, val)	writel(val, obj->reg + offset)
-
-static int hsu_dma_enable;
-module_param(hsu_dma_enable, int, 0);
-MODULE_PARM_DESC(hsu_dma_enable,
-		 "It is a bitmap to set working mode, if bit[x] is 1, then port[x] will work in DMA mode, otherwise in PIO mode.");
-
-struct hsu_dma_buffer {
-	u8		*buf;
-	dma_addr_t	dma_addr;
-	u32		dma_size;
-	u32		ofs;
-};
-
-struct hsu_dma_chan {
-	u32	id;
-	enum dma_data_direction	dirt;
-	struct uart_hsu_port	*uport;
-	void __iomem		*reg;
-};
-
-struct uart_hsu_port {
-	struct uart_port        port;
-	unsigned char           ier;
-	unsigned char           lcr;
-	unsigned char           mcr;
-	unsigned int            lsr_break_flag;
-	char			name[12];
-	int			index;
-	struct device		*dev;
-
-	struct hsu_dma_chan	*txc;
-	struct hsu_dma_chan	*rxc;
-	struct hsu_dma_buffer	txbuf;
-	struct hsu_dma_buffer	rxbuf;
-	int			use_dma;	/* flag for DMA/PIO */
-	int			running;
-	int			dma_tx_on;
-};
-
-/* Top level data structure of HSU */
-struct hsu_port {
-	void __iomem	*reg;
-	unsigned long	paddr;
-	unsigned long	iolen;
-	u32		irq;
-
-	struct uart_hsu_port	port[3];
-	struct hsu_dma_chan	chans[10];
-
-	struct dentry *debugfs;
-};
-
-static inline unsigned int serial_in(struct uart_hsu_port *up, int offset)
-{
-	unsigned int val;
-
-	if (offset > UART_MSR) {
-		offset <<= 2;
-		val = readl(up->port.membase + offset);
-	} else
-		val = (unsigned int)readb(up->port.membase + offset);
-
-	return val;
-}
-
-static inline void serial_out(struct uart_hsu_port *up, int offset, int value)
-{
-	if (offset > UART_MSR) {
-		offset <<= 2;
-		writel(value, up->port.membase + offset);
-	} else {
-		unsigned char val = value & 0xff;
-		writeb(val, up->port.membase + offset);
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-
-#define HSU_REGS_BUFSIZE	1024
-
-
-static ssize_t port_show_regs(struct file *file, char __user *user_buf,
-				size_t count, loff_t *ppos)
-{
-	struct uart_hsu_port *up = file->private_data;
-	char *buf;
-	u32 len = 0;
-	ssize_t ret;
-
-	buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL);
-	if (!buf)
-		return 0;
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MFD HSU port[%d] regs:\n", up->index);
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"=================================\n");
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"IER: \t\t0x%08x\n", serial_in(up, UART_IER));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"IIR: \t\t0x%08x\n", serial_in(up, UART_IIR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"LCR: \t\t0x%08x\n", serial_in(up, UART_LCR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MCR: \t\t0x%08x\n", serial_in(up, UART_MCR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"LSR: \t\t0x%08x\n", serial_in(up, UART_LSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MSR: \t\t0x%08x\n", serial_in(up, UART_MSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"FOR: \t\t0x%08x\n", serial_in(up, UART_FOR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"PS: \t\t0x%08x\n", serial_in(up, UART_PS));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MUL: \t\t0x%08x\n", serial_in(up, UART_MUL));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"DIV: \t\t0x%08x\n", serial_in(up, UART_DIV));
-
-	if (len > HSU_REGS_BUFSIZE)
-		len = HSU_REGS_BUFSIZE;
-
-	ret =  simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-	return ret;
-}
-
-static ssize_t dma_show_regs(struct file *file, char __user *user_buf,
-				size_t count, loff_t *ppos)
-{
-	struct hsu_dma_chan *chan = file->private_data;
-	char *buf;
-	u32 len = 0;
-	ssize_t ret;
-
-	buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL);
-	if (!buf)
-		return 0;
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MFD HSU DMA channel [%d] regs:\n", chan->id);
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"=================================\n");
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"CR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_CR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"DCR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_DCR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"BSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_BSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MOTSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_MOTSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0TSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1TSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2TSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3TSR));
-
-	if (len > HSU_REGS_BUFSIZE)
-		len = HSU_REGS_BUFSIZE;
-
-	ret =  simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-	return ret;
-}
-
-static const struct file_operations port_regs_ops = {
-	.owner		= THIS_MODULE,
-	.open		= simple_open,
-	.read		= port_show_regs,
-	.llseek		= default_llseek,
-};
-
-static const struct file_operations dma_regs_ops = {
-	.owner		= THIS_MODULE,
-	.open		= simple_open,
-	.read		= dma_show_regs,
-	.llseek		= default_llseek,
-};
-
-static int hsu_debugfs_init(struct hsu_port *hsu)
-{
-	int i;
-	char name[32];
-
-	hsu->debugfs = debugfs_create_dir("hsu", NULL);
-	if (!hsu->debugfs)
-		return -ENOMEM;
-
-	for (i = 0; i < 3; i++) {
-		snprintf(name, sizeof(name), "port_%d_regs", i);
-		debugfs_create_file(name, S_IFREG | S_IRUGO,
-			hsu->debugfs, (void *)(&hsu->port[i]), &port_regs_ops);
-	}
-
-	for (i = 0; i < 6; i++) {
-		snprintf(name, sizeof(name), "dma_chan_%d_regs", i);
-		debugfs_create_file(name, S_IFREG | S_IRUGO,
-			hsu->debugfs, (void *)&hsu->chans[i], &dma_regs_ops);
-	}
-
-	return 0;
-}
-
-static void hsu_debugfs_remove(struct hsu_port *hsu)
-{
-	if (hsu->debugfs)
-		debugfs_remove_recursive(hsu->debugfs);
-}
-
-#else
-static inline int hsu_debugfs_init(struct hsu_port *hsu)
-{
-	return 0;
-}
-
-static inline void hsu_debugfs_remove(struct hsu_port *hsu)
-{
-}
-#endif /* CONFIG_DEBUG_FS */
-
-static void serial_hsu_enable_ms(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-
-	up->ier |= UART_IER_MSI;
-	serial_out(up, UART_IER, up->ier);
-}
-
-static void hsu_dma_tx(struct uart_hsu_port *up)
-{
-	struct circ_buf *xmit = &up->port.state->xmit;
-	struct hsu_dma_buffer *dbuf = &up->txbuf;
-	int count;
-
-	/* test_and_set_bit may be better, but anyway it's in lock protected mode */
-	if (up->dma_tx_on)
-		return;
-
-	/* Update the circ buf info */
-	xmit->tail += dbuf->ofs;
-	xmit->tail &= UART_XMIT_SIZE - 1;
-
-	up->port.icount.tx += dbuf->ofs;
-	dbuf->ofs = 0;
-
-	/* Disable the channel */
-	chan_writel(up->txc, HSU_CH_CR, 0x0);
-
-	if (!uart_circ_empty(xmit) && !uart_tx_stopped(&up->port)) {
-		dma_sync_single_for_device(up->port.dev,
-					   dbuf->dma_addr,
-					   dbuf->dma_size,
-					   DMA_TO_DEVICE);
-
-		count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
-		dbuf->ofs = count;
-
-		/* Reprogram the channel */
-		chan_writel(up->txc, HSU_CH_D0SAR, dbuf->dma_addr + xmit->tail);
-		chan_writel(up->txc, HSU_CH_D0TSR, count);
-
-		/* Reenable the channel */
-		chan_writel(up->txc, HSU_CH_DCR, 0x1
-						 | (0x1 << 8)
-						 | (0x1 << 16)
-						 | (0x1 << 24));
-		up->dma_tx_on = 1;
-		chan_writel(up->txc, HSU_CH_CR, 0x1);
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
-}
-
-/* The buffer is already cache coherent */
-static void hsu_dma_start_rx_chan(struct hsu_dma_chan *rxc,
-					struct hsu_dma_buffer *dbuf)
-{
-	dbuf->ofs = 0;
-
-	chan_writel(rxc, HSU_CH_BSR, 32);
-	chan_writel(rxc, HSU_CH_MOTSR, 4);
-
-	chan_writel(rxc, HSU_CH_D0SAR, dbuf->dma_addr);
-	chan_writel(rxc, HSU_CH_D0TSR, dbuf->dma_size);
-	chan_writel(rxc, HSU_CH_DCR, 0x1 | (0x1 << 8)
-					 | (0x1 << 16)
-					 | (0x1 << 24)	/* timeout bit, see HSU Errata 1 */
-					 );
-	chan_writel(rxc, HSU_CH_CR, 0x3);
-}
-
-/* Protected by spin_lock_irqsave(port->lock) */
-static void serial_hsu_start_tx(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-
-	if (up->use_dma) {
-		hsu_dma_tx(up);
-	} else if (!(up->ier & UART_IER_THRI)) {
-		up->ier |= UART_IER_THRI;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-static void serial_hsu_stop_tx(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	struct hsu_dma_chan *txc = up->txc;
-
-	if (up->use_dma)
-		chan_writel(txc, HSU_CH_CR, 0x0);
-	else if (up->ier & UART_IER_THRI) {
-		up->ier &= ~UART_IER_THRI;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-/* This is always called in spinlock protected mode, so
- * modify timeout timer is safe here */
-static void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts,
-			unsigned long *flags)
-{
-	struct hsu_dma_buffer *dbuf = &up->rxbuf;
-	struct hsu_dma_chan *chan = up->rxc;
-	struct uart_port *port = &up->port;
-	struct tty_port *tport = &port->state->port;
-	int count;
-
-	/*
-	 * First need to know how many is already transferred,
-	 * then check if its a timeout DMA irq, and return
-	 * the trail bytes out, push them up and reenable the
-	 * channel
-	 */
-
-	/* Timeout IRQ, need wait some time, see Errata 2 */
-	if (int_sts & 0xf00)
-		udelay(2);
-
-	/* Stop the channel */
-	chan_writel(chan, HSU_CH_CR, 0x0);
-
-	count = chan_readl(chan, HSU_CH_D0SAR) - dbuf->dma_addr;
-	if (!count) {
-		/* Restart the channel before we leave */
-		chan_writel(chan, HSU_CH_CR, 0x3);
-		return;
-	}
-
-	dma_sync_single_for_cpu(port->dev, dbuf->dma_addr,
-			dbuf->dma_size, DMA_FROM_DEVICE);
-
-	/*
-	 * Head will only wrap around when we recycle
-	 * the DMA buffer, and when that happens, we
-	 * explicitly set tail to 0. So head will
-	 * always be greater than tail.
-	 */
-	tty_insert_flip_string(tport, dbuf->buf, count);
-	port->icount.rx += count;
-
-	dma_sync_single_for_device(up->port.dev, dbuf->dma_addr,
-			dbuf->dma_size, DMA_FROM_DEVICE);
-
-	/* Reprogram the channel */
-	chan_writel(chan, HSU_CH_D0SAR, dbuf->dma_addr);
-	chan_writel(chan, HSU_CH_D0TSR, dbuf->dma_size);
-	chan_writel(chan, HSU_CH_DCR, 0x1
-					 | (0x1 << 8)
-					 | (0x1 << 16)
-					 | (0x1 << 24)	/* timeout bit, see HSU Errata 1 */
-					 );
-	spin_unlock_irqrestore(&up->port.lock, *flags);
-	tty_flip_buffer_push(tport);
-	spin_lock_irqsave(&up->port.lock, *flags);
-
-	chan_writel(chan, HSU_CH_CR, 0x3);
-
-}
-
-static void serial_hsu_stop_rx(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	struct hsu_dma_chan *chan = up->rxc;
-
-	if (up->use_dma)
-		chan_writel(chan, HSU_CH_CR, 0x2);
-	else {
-		up->ier &= ~UART_IER_RLSI;
-		up->port.read_status_mask &= ~UART_LSR_DR;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-static inline void receive_chars(struct uart_hsu_port *up, int *status,
-		unsigned long *flags)
-{
-	unsigned int ch, flag;
-	unsigned int max_count = 256;
-
-	do {
-		ch = serial_in(up, UART_RX);
-		flag = TTY_NORMAL;
-		up->port.icount.rx++;
-
-		if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE |
-				       UART_LSR_FE | UART_LSR_OE))) {
-
-			dev_warn(up->dev, "We really rush into ERR/BI case"
-				"status = 0x%02x", *status);
-			/* For statistics only */
-			if (*status & UART_LSR_BI) {
-				*status &= ~(UART_LSR_FE | UART_LSR_PE);
-				up->port.icount.brk++;
-				/*
-				 * We do the SysRQ and SAK checking
-				 * here because otherwise the break
-				 * may get masked by ignore_status_mask
-				 * or read_status_mask.
-				 */
-				if (uart_handle_break(&up->port))
-					goto ignore_char;
-			} else if (*status & UART_LSR_PE)
-				up->port.icount.parity++;
-			else if (*status & UART_LSR_FE)
-				up->port.icount.frame++;
-			if (*status & UART_LSR_OE)
-				up->port.icount.overrun++;
-
-			/* Mask off conditions which should be ignored. */
-			*status &= up->port.read_status_mask;
-
-#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE
-			if (up->port.cons &&
-				up->port.cons->index == up->port.line) {
-				/* Recover the break flag from console xmit */
-				*status |= up->lsr_break_flag;
-				up->lsr_break_flag = 0;
-			}
-#endif
-			if (*status & UART_LSR_BI) {
-				flag = TTY_BREAK;
-			} else if (*status & UART_LSR_PE)
-				flag = TTY_PARITY;
-			else if (*status & UART_LSR_FE)
-				flag = TTY_FRAME;
-		}
-
-		if (uart_handle_sysrq_char(&up->port, ch))
-			goto ignore_char;
-
-		uart_insert_char(&up->port, *status, UART_LSR_OE, ch, flag);
-	ignore_char:
-		*status = serial_in(up, UART_LSR);
-	} while ((*status & UART_LSR_DR) && max_count--);
-
-	spin_unlock_irqrestore(&up->port.lock, *flags);
-	tty_flip_buffer_push(&up->port.state->port);
-	spin_lock_irqsave(&up->port.lock, *flags);
-}
-
-static void transmit_chars(struct uart_hsu_port *up)
-{
-	struct circ_buf *xmit = &up->port.state->xmit;
-	int count;
-
-	if (up->port.x_char) {
-		serial_out(up, UART_TX, up->port.x_char);
-		up->port.icount.tx++;
-		up->port.x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) {
-		serial_hsu_stop_tx(&up->port);
-		return;
-	}
-
-	/* The IRQ is for TX FIFO half-empty */
-	count = up->port.fifosize / 2;
-
-	do {
-		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-
-		up->port.icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
-
-	if (uart_circ_empty(xmit))
-		serial_hsu_stop_tx(&up->port);
-}
-
-static inline void check_modem_status(struct uart_hsu_port *up)
-{
-	int status;
-
-	status = serial_in(up, UART_MSR);
-
-	if ((status & UART_MSR_ANY_DELTA) == 0)
-		return;
-
-	if (status & UART_MSR_TERI)
-		up->port.icount.rng++;
-	if (status & UART_MSR_DDSR)
-		up->port.icount.dsr++;
-	/* We may only get DDCD when HW init and reset */
-	if (status & UART_MSR_DDCD)
-		uart_handle_dcd_change(&up->port, status & UART_MSR_DCD);
-	/* Will start/stop_tx accordingly */
-	if (status & UART_MSR_DCTS)
-		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
-
-	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
-}
-
-/*
- * This handles the interrupt from one port.
- */
-static irqreturn_t port_irq(int irq, void *dev_id)
-{
-	struct uart_hsu_port *up = dev_id;
-	unsigned int iir, lsr;
-	unsigned long flags;
-
-	if (unlikely(!up->running))
-		return IRQ_NONE;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	if (up->use_dma) {
-		lsr = serial_in(up, UART_LSR);
-		if (unlikely(lsr & (UART_LSR_BI | UART_LSR_PE |
-				       UART_LSR_FE | UART_LSR_OE)))
-			dev_warn(up->dev,
-				"Got lsr irq while using DMA, lsr = 0x%2x\n",
-				lsr);
-		check_modem_status(up);
-		spin_unlock_irqrestore(&up->port.lock, flags);
-		return IRQ_HANDLED;
-	}
-
-	iir = serial_in(up, UART_IIR);
-	if (iir & UART_IIR_NO_INT) {
-		spin_unlock_irqrestore(&up->port.lock, flags);
-		return IRQ_NONE;
-	}
-
-	lsr = serial_in(up, UART_LSR);
-	if (lsr & UART_LSR_DR)
-		receive_chars(up, &lsr, &flags);
-	check_modem_status(up);
-
-	/* lsr will be renewed during the receive_chars */
-	if (lsr & UART_LSR_THRE)
-		transmit_chars(up);
-
-	spin_unlock_irqrestore(&up->port.lock, flags);
-	return IRQ_HANDLED;
-}
-
-static inline void dma_chan_irq(struct hsu_dma_chan *chan)
-{
-	struct uart_hsu_port *up = chan->uport;
-	unsigned long flags;
-	u32 int_sts;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	if (!up->use_dma || !up->running)
-		goto exit;
-
-	/*
-	 * No matter what situation, need read clear the IRQ status
-	 * There is a bug, see Errata 5, HSD 2900918
-	 */
-	int_sts = chan_readl(chan, HSU_CH_SR);
-
-	/* Rx channel */
-	if (chan->dirt == DMA_FROM_DEVICE)
-		hsu_dma_rx(up, int_sts, &flags);
-
-	/* Tx channel */
-	if (chan->dirt == DMA_TO_DEVICE) {
-		chan_writel(chan, HSU_CH_CR, 0x0);
-		up->dma_tx_on = 0;
-		hsu_dma_tx(up);
-	}
-
-exit:
-	spin_unlock_irqrestore(&up->port.lock, flags);
-	return;
-}
-
-static irqreturn_t dma_irq(int irq, void *dev_id)
-{
-	struct hsu_port *hsu = dev_id;
-	u32 int_sts, i;
-
-	int_sts = mfd_readl(hsu, HSU_GBL_DMAISR);
-
-	/* Currently we only have 6 channels may be used */
-	for (i = 0; i < 6; i++) {
-		if (int_sts & 0x1)
-			dma_chan_irq(&hsu->chans[i]);
-		int_sts >>= 1;
-	}
-
-	return IRQ_HANDLED;
-}
-
-static unsigned int serial_hsu_tx_empty(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-	unsigned int ret;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0;
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	return ret;
-}
-
-static unsigned int serial_hsu_get_mctrl(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned char status;
-	unsigned int ret;
-
-	status = serial_in(up, UART_MSR);
-
-	ret = 0;
-	if (status & UART_MSR_DCD)
-		ret |= TIOCM_CAR;
-	if (status & UART_MSR_RI)
-		ret |= TIOCM_RNG;
-	if (status & UART_MSR_DSR)
-		ret |= TIOCM_DSR;
-	if (status & UART_MSR_CTS)
-		ret |= TIOCM_CTS;
-	return ret;
-}
-
-static void serial_hsu_set_mctrl(struct uart_port *port, unsigned int mctrl)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned char mcr = 0;
-
-	if (mctrl & TIOCM_RTS)
-		mcr |= UART_MCR_RTS;
-	if (mctrl & TIOCM_DTR)
-		mcr |= UART_MCR_DTR;
-	if (mctrl & TIOCM_OUT1)
-		mcr |= UART_MCR_OUT1;
-	if (mctrl & TIOCM_OUT2)
-		mcr |= UART_MCR_OUT2;
-	if (mctrl & TIOCM_LOOP)
-		mcr |= UART_MCR_LOOP;
-
-	mcr |= up->mcr;
-
-	serial_out(up, UART_MCR, mcr);
-}
-
-static void serial_hsu_break_ctl(struct uart_port *port, int break_state)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	if (break_state == -1)
-		up->lcr |= UART_LCR_SBC;
-	else
-		up->lcr &= ~UART_LCR_SBC;
-	serial_out(up, UART_LCR, up->lcr);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-}
-
-/*
- * What special to do:
- * 1. chose the 64B fifo mode
- * 2. start dma or pio depends on configuration
- * 3. we only allocate dma memory when needed
- */
-static int serial_hsu_startup(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-
-	pm_runtime_get_sync(up->dev);
-
-	/*
-	 * Clear the FIFO buffers and disable them.
-	 * (they will be reenabled in set_termios())
-	 */
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
-			UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
-	serial_out(up, UART_FCR, 0);
-
-	/* Clear the interrupt registers. */
-	(void) serial_in(up, UART_LSR);
-	(void) serial_in(up, UART_RX);
-	(void) serial_in(up, UART_IIR);
-	(void) serial_in(up, UART_MSR);
-
-	/* Now, initialize the UART, default is 8n1 */
-	serial_out(up, UART_LCR, UART_LCR_WLEN8);
-
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	up->port.mctrl |= TIOCM_OUT2;
-	serial_hsu_set_mctrl(&up->port, up->port.mctrl);
-
-	/*
-	 * Finally, enable interrupts.  Note: Modem status interrupts
-	 * are set via set_termios(), which will be occurring imminently
-	 * anyway, so we don't enable them here.
-	 */
-	if (!up->use_dma)
-		up->ier = UART_IER_RLSI | UART_IER_RDI | UART_IER_RTOIE;
-	else
-		up->ier = 0;
-	serial_out(up, UART_IER, up->ier);
-
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	/* DMA init */
-	if (up->use_dma) {
-		struct hsu_dma_buffer *dbuf;
-		struct circ_buf *xmit = &port->state->xmit;
-
-		up->dma_tx_on = 0;
-
-		/* First allocate the RX buffer */
-		dbuf = &up->rxbuf;
-		dbuf->buf = kzalloc(HSU_DMA_BUF_SIZE, GFP_KERNEL);
-		if (!dbuf->buf) {
-			up->use_dma = 0;
-			goto exit;
-		}
-		dbuf->dma_addr = dma_map_single(port->dev,
-						dbuf->buf,
-						HSU_DMA_BUF_SIZE,
-						DMA_FROM_DEVICE);
-		dbuf->dma_size = HSU_DMA_BUF_SIZE;
-
-		/* Start the RX channel right now */
-		hsu_dma_start_rx_chan(up->rxc, dbuf);
-
-		/* Next init the TX DMA */
-		dbuf = &up->txbuf;
-		dbuf->buf = xmit->buf;
-		dbuf->dma_addr = dma_map_single(port->dev,
-					       dbuf->buf,
-					       UART_XMIT_SIZE,
-					       DMA_TO_DEVICE);
-		dbuf->dma_size = UART_XMIT_SIZE;
-
-		/* This should not be changed all around */
-		chan_writel(up->txc, HSU_CH_BSR, 32);
-		chan_writel(up->txc, HSU_CH_MOTSR, 4);
-		dbuf->ofs = 0;
-	}
-
-exit:
-	 /* And clear the interrupt registers again for luck. */
-	(void) serial_in(up, UART_LSR);
-	(void) serial_in(up, UART_RX);
-	(void) serial_in(up, UART_IIR);
-	(void) serial_in(up, UART_MSR);
-
-	up->running = 1;
-	return 0;
-}
-
-static void serial_hsu_shutdown(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-
-	/* Disable interrupts from this port */
-	up->ier = 0;
-	serial_out(up, UART_IER, 0);
-	up->running = 0;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	up->port.mctrl &= ~TIOCM_OUT2;
-	serial_hsu_set_mctrl(&up->port, up->port.mctrl);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	/* Disable break condition and FIFOs */
-	serial_out(up, UART_LCR, serial_in(up, UART_LCR) & ~UART_LCR_SBC);
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
-				  UART_FCR_CLEAR_RCVR |
-				  UART_FCR_CLEAR_XMIT);
-	serial_out(up, UART_FCR, 0);
-
-	pm_runtime_put(up->dev);
-}
-
-static void
-serial_hsu_set_termios(struct uart_port *port, struct ktermios *termios,
-		       struct ktermios *old)
-{
-	struct uart_hsu_port *up =
-			container_of(port, struct uart_hsu_port, port);
-	unsigned char cval, fcr = 0;
-	unsigned long flags;
-	unsigned int baud, quot;
-	u32 ps, mul;
-
-	switch (termios->c_cflag & CSIZE) {
-	case CS5:
-		cval = UART_LCR_WLEN5;
-		break;
-	case CS6:
-		cval = UART_LCR_WLEN6;
-		break;
-	case CS7:
-		cval = UART_LCR_WLEN7;
-		break;
-	default:
-	case CS8:
-		cval = UART_LCR_WLEN8;
-		break;
-	}
-
-	/* CMSPAR isn't supported by this driver */
-	termios->c_cflag &= ~CMSPAR;
-
-	if (termios->c_cflag & CSTOPB)
-		cval |= UART_LCR_STOP;
-	if (termios->c_cflag & PARENB)
-		cval |= UART_LCR_PARITY;
-	if (!(termios->c_cflag & PARODD))
-		cval |= UART_LCR_EPAR;
-
-	/*
-	 * The base clk is 50Mhz, and the baud rate come from:
-	 *	baud = 50M * MUL / (DIV * PS * DLAB)
-	 *
-	 * For those basic low baud rate we can get the direct
-	 * scalar from 2746800, like 115200 = 2746800/24. For those
-	 * higher baud rate, we handle them case by case, mainly by
-	 * adjusting the MUL/PS registers, and DIV register is kept
-	 * as default value 0x3d09 to make things simple
-	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, 4000000);
-
-	quot = 1;
-	ps = 0x10;
-	mul = 0x3600;
-	switch (baud) {
-	case 3500000:
-		mul = 0x3345;
-		ps = 0xC;
-		break;
-	case 1843200:
-		mul = 0x2400;
-		break;
-	case 3000000:
-	case 2500000:
-	case 2000000:
-	case 1500000:
-	case 1000000:
-	case 500000:
-		/* mul/ps/quot = 0x9C4/0x10/0x1 will make a 500000 bps */
-		mul = baud / 500000 * 0x9C4;
-		break;
-	default:
-		/* Use uart_get_divisor to get quot for other baud rates */
-		quot = 0;
-	}
-
-	if (!quot)
-		quot = uart_get_divisor(port, baud);
-
-	if ((up->port.uartclk / quot) < (2400 * 16))
-		fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_1B;
-	else if ((up->port.uartclk / quot) < (230400 * 16))
-		fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_16B;
-	else
-		fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_32B;
-
-	fcr |= UART_FCR_HSU_64B_FIFO;
-
-	/*
-	 * Ok, we're now changing the port state.  Do it with
-	 * interrupts disabled.
-	 */
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	/* Update the per-port timeout */
-	uart_update_timeout(port, termios->c_cflag, baud);
-
-	up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
-	if (termios->c_iflag & INPCK)
-		up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE;
-	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
-		up->port.read_status_mask |= UART_LSR_BI;
-
-	/* Characters to ignore */
-	up->port.ignore_status_mask = 0;
-	if (termios->c_iflag & IGNPAR)
-		up->port.ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
-	if (termios->c_iflag & IGNBRK) {
-		up->port.ignore_status_mask |= UART_LSR_BI;
-		/*
-		 * If we're ignoring parity and break indicators,
-		 * ignore overruns too (for real raw support).
-		 */
-		if (termios->c_iflag & IGNPAR)
-			up->port.ignore_status_mask |= UART_LSR_OE;
-	}
-
-	/* Ignore all characters if CREAD is not set */
-	if ((termios->c_cflag & CREAD) == 0)
-		up->port.ignore_status_mask |= UART_LSR_DR;
-
-	/*
-	 * CTS flow control flag and modem status interrupts, disable
-	 * MSI by default
-	 */
-	up->ier &= ~UART_IER_MSI;
-	if (UART_ENABLE_MS(&up->port, termios->c_cflag))
-		up->ier |= UART_IER_MSI;
-
-	serial_out(up, UART_IER, up->ier);
-
-	if (termios->c_cflag & CRTSCTS)
-		up->mcr |= UART_MCR_AFE | UART_MCR_RTS;
-	else
-		up->mcr &= ~UART_MCR_AFE;
-
-	serial_out(up, UART_LCR, cval | UART_LCR_DLAB);	/* set DLAB */
-	serial_out(up, UART_DLL, quot & 0xff);		/* LS of divisor */
-	serial_out(up, UART_DLM, quot >> 8);		/* MS of divisor */
-	serial_out(up, UART_LCR, cval);			/* reset DLAB */
-	serial_out(up, UART_MUL, mul);			/* set MUL */
-	serial_out(up, UART_PS, ps);			/* set PS */
-	up->lcr = cval;					/* Save LCR */
-	serial_hsu_set_mctrl(&up->port, up->port.mctrl);
-	serial_out(up, UART_FCR, fcr);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-}
-
-static void
-serial_hsu_pm(struct uart_port *port, unsigned int state,
-	      unsigned int oldstate)
-{
-}
-
-static void serial_hsu_release_port(struct uart_port *port)
-{
-}
-
-static int serial_hsu_request_port(struct uart_port *port)
-{
-	return 0;
-}
-
-static void serial_hsu_config_port(struct uart_port *port, int flags)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	up->port.type = PORT_MFD;
-}
-
-static int
-serial_hsu_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	/* We don't want the core code to modify any port params */
-	return -EINVAL;
-}
-
-static const char *
-serial_hsu_type(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	return up->name;
-}
-
-/* Mainly for uart console use */
-static struct uart_hsu_port *serial_hsu_ports[3];
-static struct uart_driver serial_hsu_reg;
-
-#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-/* Wait for transmitter & holding register to empty */
-static inline void wait_for_xmitr(struct uart_hsu_port *up)
-{
-	unsigned int status, tmout = 1000;
-
-	/* Wait up to 1ms for the character to be sent. */
-	do {
-		status = serial_in(up, UART_LSR);
-
-		if (status & UART_LSR_BI)
-			up->lsr_break_flag = UART_LSR_BI;
-
-		if (--tmout == 0)
-			break;
-		udelay(1);
-	} while (!(status & BOTH_EMPTY));
-
-	/* Wait up to 1s for flow control if necessary */
-	if (up->port.flags & UPF_CONS_FLOW) {
-		tmout = 1000000;
-		while (--tmout &&
-		       ((serial_in(up, UART_MSR) & UART_MSR_CTS) == 0))
-			udelay(1);
-	}
-}
-
-static void serial_hsu_console_putchar(struct uart_port *port, int ch)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-
-	wait_for_xmitr(up);
-	serial_out(up, UART_TX, ch);
-}
-
-/*
- * Print a string to the serial port trying not to disturb
- * any possible real use of the port...
- *
- *	The console_lock must be held when we get here.
- */
-static void
-serial_hsu_console_write(struct console *co, const char *s, unsigned int count)
-{
-	struct uart_hsu_port *up = serial_hsu_ports[co->index];
-	unsigned long flags;
-	unsigned int ier;
-	int locked = 1;
-
-	touch_nmi_watchdog();
-
-	local_irq_save(flags);
-	if (up->port.sysrq)
-		locked = 0;
-	else if (oops_in_progress) {
-		locked = spin_trylock(&up->port.lock);
-	} else
-		spin_lock(&up->port.lock);
-
-	/* First save the IER then disable the interrupts */
-	ier = serial_in(up, UART_IER);
-	serial_out(up, UART_IER, 0);
-
-	uart_console_write(&up->port, s, count, serial_hsu_console_putchar);
-
-	/*
-	 * Finally, wait for transmitter to become empty
-	 * and restore the IER
-	 */
-	wait_for_xmitr(up);
-	serial_out(up, UART_IER, ier);
-
-	if (locked)
-		spin_unlock(&up->port.lock);
-	local_irq_restore(flags);
-}
-
-static struct console serial_hsu_console;
-
-static int __init
-serial_hsu_console_setup(struct console *co, char *options)
-{
-	struct uart_hsu_port *up;
-	int baud = 115200;
-	int bits = 8;
-	int parity = 'n';
-	int flow = 'n';
-
-	if (co->index == -1 || co->index >= serial_hsu_reg.nr)
-		co->index = 0;
-	up = serial_hsu_ports[co->index];
-	if (!up)
-		return -ENODEV;
-
-	if (options)
-		uart_parse_options(options, &baud, &parity, &bits, &flow);
-
-	return uart_set_options(&up->port, co, baud, parity, bits, flow);
-}
-
-static struct console serial_hsu_console = {
-	.name		= "ttyMFD",
-	.write		= serial_hsu_console_write,
-	.device		= uart_console_device,
-	.setup		= serial_hsu_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-	.data		= &serial_hsu_reg,
-};
-
-#define SERIAL_HSU_CONSOLE	(&serial_hsu_console)
-#else
-#define SERIAL_HSU_CONSOLE	NULL
-#endif
-
-static struct uart_ops serial_hsu_pops = {
-	.tx_empty	= serial_hsu_tx_empty,
-	.set_mctrl	= serial_hsu_set_mctrl,
-	.get_mctrl	= serial_hsu_get_mctrl,
-	.stop_tx	= serial_hsu_stop_tx,
-	.start_tx	= serial_hsu_start_tx,
-	.stop_rx	= serial_hsu_stop_rx,
-	.enable_ms	= serial_hsu_enable_ms,
-	.break_ctl	= serial_hsu_break_ctl,
-	.startup	= serial_hsu_startup,
-	.shutdown	= serial_hsu_shutdown,
-	.set_termios	= serial_hsu_set_termios,
-	.pm		= serial_hsu_pm,
-	.type		= serial_hsu_type,
-	.release_port	= serial_hsu_release_port,
-	.request_port	= serial_hsu_request_port,
-	.config_port	= serial_hsu_config_port,
-	.verify_port	= serial_hsu_verify_port,
-};
-
-static struct uart_driver serial_hsu_reg = {
-	.owner		= THIS_MODULE,
-	.driver_name	= "MFD serial",
-	.dev_name	= "ttyMFD",
-	.major		= TTY_MAJOR,
-	.minor		= 128,
-	.nr		= 3,
-	.cons		= SERIAL_HSU_CONSOLE,
-};
-
-#ifdef CONFIG_PM
-static int serial_hsu_suspend(struct pci_dev *pdev, pm_message_t state)
-{
-	void *priv = pci_get_drvdata(pdev);
-	struct uart_hsu_port *up;
-
-	/* Make sure this is not the internal dma controller */
-	if (priv && (pdev->device != 0x081E)) {
-		up = priv;
-		uart_suspend_port(&serial_hsu_reg, &up->port);
-	}
-
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, pci_choose_state(pdev, state));
-        return 0;
-}
-
-static int serial_hsu_resume(struct pci_dev *pdev)
-{
-	void *priv = pci_get_drvdata(pdev);
-	struct uart_hsu_port *up;
-	int ret;
-
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-
-	ret = pci_enable_device(pdev);
-	if (ret)
-		dev_warn(&pdev->dev,
-			"HSU: can't re-enable device, try to continue\n");
-
-	if (priv && (pdev->device != 0x081E)) {
-		up = priv;
-		uart_resume_port(&serial_hsu_reg, &up->port);
-	}
-	return 0;
-}
-
-static int serial_hsu_runtime_idle(struct device *dev)
-{
-	pm_schedule_suspend(dev, 500);
-	return -EBUSY;
-}
-
-static int serial_hsu_runtime_suspend(struct device *dev)
-{
-	return 0;
-}
-
-static int serial_hsu_runtime_resume(struct device *dev)
-{
-	return 0;
-}
-#else
-#define serial_hsu_suspend		NULL
-#define serial_hsu_resume		NULL
-#define serial_hsu_runtime_idle		NULL
-#define serial_hsu_runtime_suspend	NULL
-#define serial_hsu_runtime_resume	NULL
-#endif
-
-static const struct dev_pm_ops serial_hsu_pm_ops = {
-	.runtime_suspend = serial_hsu_runtime_suspend,
-	.runtime_resume = serial_hsu_runtime_resume,
-	.runtime_idle = serial_hsu_runtime_idle,
-};
-
-/* temp global pointer before we settle down on using one or four PCI dev */
-static struct hsu_port *phsu;
-
-static int serial_hsu_probe(struct pci_dev *pdev,
-				const struct pci_device_id *ent)
-{
-	struct uart_hsu_port *uport;
-	int index, ret;
-
-	printk(KERN_INFO "HSU: found PCI Serial controller(ID: %04x:%04x)\n",
-		pdev->vendor, pdev->device);
-
-	switch (pdev->device) {
-	case 0x081B:
-		index = 0;
-		break;
-	case 0x081C:
-		index = 1;
-		break;
-	case 0x081D:
-		index = 2;
-		break;
-	case 0x081E:
-		/* internal DMA controller */
-		index = 3;
-		break;
-	default:
-		dev_err(&pdev->dev, "HSU: out of index!");
-		return -ENODEV;
-	}
-
-	ret = pci_enable_device(pdev);
-	if (ret)
-		return ret;
-
-	if (index == 3) {
-		/* DMA controller */
-		ret = request_irq(pdev->irq, dma_irq, 0, "hsu_dma", phsu);
-		if (ret) {
-			dev_err(&pdev->dev, "can not get IRQ\n");
-			goto err_disable;
-		}
-		pci_set_drvdata(pdev, phsu);
-	} else {
-		/* UART port 0~2 */
-		uport = &phsu->port[index];
-		uport->port.irq = pdev->irq;
-		uport->port.dev = &pdev->dev;
-		uport->dev = &pdev->dev;
-
-		ret = request_irq(pdev->irq, port_irq, 0, uport->name, uport);
-		if (ret) {
-			dev_err(&pdev->dev, "can not get IRQ\n");
-			goto err_disable;
-		}
-		uart_add_one_port(&serial_hsu_reg, &uport->port);
-
-		pci_set_drvdata(pdev, uport);
-	}
-
-	pm_runtime_put_noidle(&pdev->dev);
-	pm_runtime_allow(&pdev->dev);
-
-	return 0;
-
-err_disable:
-	pci_disable_device(pdev);
-	return ret;
-}
-
-static void hsu_global_init(void)
-{
-	struct hsu_port *hsu;
-	struct uart_hsu_port *uport;
-	struct hsu_dma_chan *dchan;
-	int i, ret;
-
-	hsu = kzalloc(sizeof(struct hsu_port), GFP_KERNEL);
-	if (!hsu)
-		return;
-
-	/* Get basic io resource and map it */
-	hsu->paddr = 0xffa28000;
-	hsu->iolen = 0x1000;
-
-	if (!(request_mem_region(hsu->paddr, hsu->iolen, "HSU global")))
-		pr_warn("HSU: error in request mem region\n");
-
-	hsu->reg = ioremap_nocache((unsigned long)hsu->paddr, hsu->iolen);
-	if (!hsu->reg) {
-		pr_err("HSU: error in ioremap\n");
-		ret = -ENOMEM;
-		goto err_free_region;
-	}
-
-	/* Initialise the 3 UART ports */
-	uport = hsu->port;
-	for (i = 0; i < 3; i++) {
-		uport->port.type = PORT_MFD;
-		uport->port.iotype = UPIO_MEM;
-		uport->port.mapbase = (resource_size_t)hsu->paddr
-					+ HSU_PORT_REG_OFFSET
-					+ i * HSU_PORT_REG_LENGTH;
-		uport->port.membase = hsu->reg + HSU_PORT_REG_OFFSET
-					+ i * HSU_PORT_REG_LENGTH;
-
-		sprintf(uport->name, "hsu_port%d", i);
-		uport->port.fifosize = 64;
-		uport->port.ops = &serial_hsu_pops;
-		uport->port.line = i;
-		uport->port.flags = UPF_IOREMAP;
-		/* set the scalable maxim support rate to 2746800 bps */
-		uport->port.uartclk = 115200 * 24 * 16;
-
-		uport->running = 0;
-		uport->txc = &hsu->chans[i * 2];
-		uport->rxc = &hsu->chans[i * 2 + 1];
-
-		serial_hsu_ports[i] = uport;
-		uport->index = i;
-
-		if (hsu_dma_enable & (1<<i))
-			uport->use_dma = 1;
-		else
-			uport->use_dma = 0;
-
-		uport++;
-	}
-
-	/* Initialise 6 dma channels */
-	dchan = hsu->chans;
-	for (i = 0; i < 6; i++) {
-		dchan->id = i;
-		dchan->dirt = (i & 0x1) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-		dchan->uport = &hsu->port[i/2];
-		dchan->reg = hsu->reg + HSU_DMA_CHANS_REG_OFFSET +
-				i * HSU_DMA_CHANS_REG_LENGTH;
-
-		dchan++;
-	}
-
-	phsu = hsu;
-	hsu_debugfs_init(hsu);
-	return;
-
-err_free_region:
-	release_mem_region(hsu->paddr, hsu->iolen);
-	kfree(hsu);
-	return;
-}
-
-static void serial_hsu_remove(struct pci_dev *pdev)
-{
-	void *priv = pci_get_drvdata(pdev);
-	struct uart_hsu_port *up;
-
-	if (!priv)
-		return;
-
-	pm_runtime_forbid(&pdev->dev);
-	pm_runtime_get_noresume(&pdev->dev);
-
-	/* For port 0/1/2, priv is the address of uart_hsu_port */
-	if (pdev->device != 0x081E) {
-		up = priv;
-		uart_remove_one_port(&serial_hsu_reg, &up->port);
-	}
-
-	free_irq(pdev->irq, priv);
-	pci_disable_device(pdev);
-}
-
-/* First 3 are UART ports, and the 4th is the DMA */
-static const struct pci_device_id pci_ids[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081B) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081C) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081D) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081E) },
-	{},
-};
-
-static struct pci_driver hsu_pci_driver = {
-	.name =		"HSU serial",
-	.id_table =	pci_ids,
-	.probe =	serial_hsu_probe,
-	.remove =	serial_hsu_remove,
-	.suspend =	serial_hsu_suspend,
-	.resume	=	serial_hsu_resume,
-	.driver = {
-		.pm = &serial_hsu_pm_ops,
-	},
-};
-
-static int __init hsu_pci_init(void)
-{
-	int ret;
-
-	hsu_global_init();
-
-	ret = uart_register_driver(&serial_hsu_reg);
-	if (ret)
-		return ret;
-
-	return pci_register_driver(&hsu_pci_driver);
-}
-
-static void __exit hsu_pci_exit(void)
-{
-	pci_unregister_driver(&hsu_pci_driver);
-	uart_unregister_driver(&serial_hsu_reg);
-
-	hsu_debugfs_remove(phsu);
-
-	kfree(phsu);
-}
-
-module_init(hsu_pci_init);
-module_exit(hsu_pci_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, pci_ids);
diff --git a/include/linux/serial_mfd.h b/include/linux/serial_mfd.h
deleted file mode 100644
index 2b071e0b034d..000000000000
--- a/include/linux/serial_mfd.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _SERIAL_MFD_H_
-#define _SERIAL_MFD_H_
-
-/* HW register offset definition */
-#define UART_FOR	0x08
-#define UART_PS		0x0C
-#define UART_MUL	0x0D
-#define UART_DIV	0x0E
-
-#define HSU_GBL_IEN	0x0
-#define HSU_GBL_IST	0x4
-
-#define HSU_GBL_INT_BIT_PORT0	0x0
-#define HSU_GBL_INT_BIT_PORT1	0x1
-#define HSU_GBL_INT_BIT_PORT2	0x2
-#define HSU_GBL_INT_BIT_IRI	0x3
-#define HSU_GBL_INT_BIT_HDLC	0x4
-#define HSU_GBL_INT_BIT_DMA	0x5
-
-#define HSU_GBL_ISR	0x8
-#define HSU_GBL_DMASR	0x400
-#define HSU_GBL_DMAISR	0x404
-
-#define HSU_PORT_REG_OFFSET	0x80
-#define HSU_PORT0_REG_OFFSET	0x80
-#define HSU_PORT1_REG_OFFSET	0x100
-#define HSU_PORT2_REG_OFFSET	0x180
-#define HSU_PORT_REG_LENGTH	0x80
-
-#define HSU_DMA_CHANS_REG_OFFSET	0x500
-#define HSU_DMA_CHANS_REG_LENGTH	0x40
-
-#define HSU_CH_SR		0x0	/* channel status reg */
-#define HSU_CH_CR		0x4	/* control reg */
-#define HSU_CH_DCR		0x8	/* descriptor control reg */
-#define HSU_CH_BSR		0x10	/* max fifo buffer size reg */
-#define HSU_CH_MOTSR		0x14	/* minimum ocp transfer size */
-#define HSU_CH_D0SAR		0x20	/* desc 0 start addr */
-#define HSU_CH_D0TSR		0x24	/* desc 0 transfer size */
-#define HSU_CH_D1SAR		0x28
-#define HSU_CH_D1TSR		0x2C
-#define HSU_CH_D2SAR		0x30
-#define HSU_CH_D2TSR		0x34
-#define HSU_CH_D3SAR		0x38
-#define HSU_CH_D3TSR		0x3C
-
-#endif
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 00adb01fa5f3..e9b4cb0cd7ed 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -241,25 +241,6 @@
 #define UART_FCR_PXAR16	0x80	/* receive FIFO threshold = 16 */
 #define UART_FCR_PXAR32	0xc0	/* receive FIFO threshold = 32 */
 
-/*
- * Intel MID on-chip HSU (High Speed UART) defined bits
- */
-#define UART_FCR_HSU_64_1B	0x00	/* receive FIFO treshold = 1 */
-#define UART_FCR_HSU_64_16B	0x40	/* receive FIFO treshold = 16 */
-#define UART_FCR_HSU_64_32B	0x80	/* receive FIFO treshold = 32 */
-#define UART_FCR_HSU_64_56B	0xc0	/* receive FIFO treshold = 56 */
-
-#define UART_FCR_HSU_16_1B	0x00	/* receive FIFO treshold = 1 */
-#define UART_FCR_HSU_16_4B	0x40	/* receive FIFO treshold = 4 */
-#define UART_FCR_HSU_16_8B	0x80	/* receive FIFO treshold = 8 */
-#define UART_FCR_HSU_16_14B	0xc0	/* receive FIFO treshold = 14 */
-
-#define UART_FCR_HSU_64B_FIFO	0x20	/* chose 64 bytes FIFO */
-#define UART_FCR_HSU_16B_FIFO	0x00	/* chose 16 bytes FIFO */
-
-#define UART_FCR_HALF_EMPT_TXI	0x00	/* trigger TX_EMPT IRQ for half empty */
-#define UART_FCR_FULL_EMPT_TXI	0x08	/* trigger TX_EMPT IRQ for full empty */
-
 /*
  * These register definitions are for the 16C950
  */
-- 
cgit v1.2.3


From 079119a2c7c83506ad753e7b95e9ed253e9963f9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 11 Mar 2015 13:52:53 +0200
Subject: serial, x86: use UPF_* constants for flags

This patch fixes the following sparse warnings:

drivers/tty/serial/8250/8250_core.c:3231:32: warning: incorrect type in assignment (different base types)
drivers/tty/serial/8250/8250_core.c:3231:32:    expected restricted upf_t [usertype] flags
drivers/tty/serial/8250/8250_core.c:3231:32:    got unsigned int const [unsigned] flags

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/serial.h  | 8 ++++----
 drivers/tty/serial/8250/8250.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h
index 460b84f64556..8378b8c9109c 100644
--- a/arch/x86/include/asm/serial.h
+++ b/arch/x86/include/asm/serial.h
@@ -12,11 +12,11 @@
 
 /* Standard COM flags (except for COM4, because of the 8514 problem) */
 #ifdef CONFIG_SERIAL_DETECT_IRQ
-# define STD_COMX_FLAGS	(ASYNC_BOOT_AUTOCONF |	ASYNC_SKIP_TEST	| ASYNC_AUTO_IRQ)
-# define STD_COM4_FLAGS	(ASYNC_BOOT_AUTOCONF |	0		| ASYNC_AUTO_IRQ)
+# define STD_COMX_FLAGS	(UPF_BOOT_AUTOCONF |	UPF_SKIP_TEST	| UPF_AUTO_IRQ)
+# define STD_COM4_FLAGS	(UPF_BOOT_AUTOCONF |	0		| UPF_AUTO_IRQ)
 #else
-# define STD_COMX_FLAGS	(ASYNC_BOOT_AUTOCONF |	ASYNC_SKIP_TEST	| 0		)
-# define STD_COM4_FLAGS	(ASYNC_BOOT_AUTOCONF |	0		| 0		)
+# define STD_COMX_FLAGS	(UPF_BOOT_AUTOCONF |	UPF_SKIP_TEST	| 0		)
+# define STD_COM4_FLAGS	(UPF_BOOT_AUTOCONF |	0		| 0		)
 #endif
 
 #define SERIAL_PORT_DFNS								\
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index b00836851061..656ecc60b5b2 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -53,7 +53,7 @@ struct old_serial_port {
 	unsigned int baud_base;
 	unsigned int port;
 	unsigned int irq;
-	unsigned int flags;
+	upf_t        flags;
 	unsigned char hub6;
 	unsigned char io_type;
 	unsigned char __iomem *iomem_base;
-- 
cgit v1.2.3


From 88ad1a147e2c84d33cb50f5ebff1ece5e0cd4383 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 20 Mar 2015 14:18:12 +1030
Subject: lguest: fix pending interrupt test.

Denys says:
  TEST with zero will always set ZF. Thus, "jnz send_interrupts" never jumps.

We get interrupts regularly enough that this didn't cause immediate problems.

Reported-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/lguest/head_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S
index 6ddfe4fc23c3..05b0a85507ce 100644
--- a/arch/x86/lguest/head_32.S
+++ b/arch/x86/lguest/head_32.S
@@ -84,7 +84,7 @@ ENTRY(lg_irq_enable)
 	 * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
 	 * jump to send_interrupts, otherwise we're done.
 	 */
-	testl $0, lguest_data+LGUEST_DATA_irq_pending
+	cmpl $0, lguest_data+LGUEST_DATA_irq_pending
 	jnz send_interrupts
 	/*
 	 * One cool thing about x86 is that you can do many things without using
-- 
cgit v1.2.3


From 41f055d49cb04d648a89a2cb6d57c94ff86d5bb6 Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Tue, 24 Mar 2015 11:51:38 +1030
Subject: lguest: rename i386_head.S in the comments

i386_head.S renamed to the head_32.S, let's update it in
the comments too.

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/lguest/boot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ac4453d8520e..543510a2f9e0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -262,7 +262,7 @@ PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
 PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
 /*:*/
 
-/* These are in i386_head.S */
+/* These are in head_32.S */
 extern void lg_irq_enable(void);
 extern void lg_restore_fl(unsigned long flags);
 
@@ -1366,7 +1366,7 @@ static void lguest_restart(char *reason)
  * fit comfortably.
  *
  * First we need assembly templates of each of the patchable Guest operations,
- * and these are in i386_head.S.
+ * and these are in head_32.S.
  */
 
 /*G:060 We construct a table from the assembler templates: */
-- 
cgit v1.2.3


From 7042cb4eb30967b5eb9eeba04907882f04d6b6e5 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Tue, 24 Mar 2015 11:51:39 +1030
Subject: lguest: simplify lguest_iret

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: lguest@lists.ozlabs.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/lguest/head_32.S | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S
index 05b0a85507ce..81678bf0fcb7 100644
--- a/arch/x86/lguest/head_32.S
+++ b/arch/x86/lguest/head_32.S
@@ -168,29 +168,28 @@ ENTRY(lg_restore_fl)
  * So we have to copy eflags from the stack to lguest_data.irq_enabled before
  * we do the "iret".
  *
- * There are two problems with this: firstly, we need to use a register to do
- * the copy and secondly, the whole thing needs to be atomic.  The first
- * problem is easy to solve: push %eax on the stack so we can use it, and then
- * restore it at the end just before the real "iret".
+ * There are two problems with this: firstly, we can't clobber any registers
+ * and secondly, the whole thing needs to be atomic.  The first problem
+ * is solved by using "push memory"/"pop memory" instruction pair for copying.
  *
  * The second is harder: copying eflags to lguest_data.irq_enabled will turn
  * interrupts on before we're finished, so we could be interrupted before we
  * return to userspace or wherever.  Our solution to this is to surround the
  * code with lguest_noirq_start: and lguest_noirq_end: labels.  We tell the
  * Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled.
+ * enabled. (It's not necessary to protect pop instruction, since
+ * data gets updated only after it completes, so we end up surrounding
+ * just one instruction, iret).
  */
 ENTRY(lguest_iret)
-	pushl	%eax
-	movl	12(%esp), %eax
-lguest_noirq_start:
+	pushl	2*4(%esp)
 	/*
 	 * Note the %ss: segment prefix here.  Normal data accesses use the
 	 * "ds" segment, but that will have already been restored for whatever
 	 * we're returning to (such as userspace): we can't trust it.  The %ss:
 	 * prefix makes sure we use the stack segment, which is still valid.
 	 */
-	movl	%eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
-	popl	%eax
+	popl	%ss:lguest_data+LGUEST_DATA_irq_enabled
+lguest_noirq_start:
 	iret
 lguest_noirq_end:
-- 
cgit v1.2.3


From 2f921b5bb0511fb698681d8ef35c48be7a9116bf Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 24 Mar 2015 11:51:39 +1030
Subject: lguest: suppress interrupts for single insn, not range.

The last patch reduced our interrupt-suppression region to one address,
so simplify the code somewhat.

Also, remove the obsolete undefined instruction ranges and the comment
which refers to lguest_guest.S instead of head_32.S.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/include/asm/lguest.h         |  7 ++-----
 arch/x86/lguest/boot.c                |  3 +--
 arch/x86/lguest/head_32.S             | 15 ++++++---------
 drivers/lguest/hypercalls.c           |  5 ++---
 drivers/lguest/interrupts_and_traps.c |  8 ++++----
 drivers/lguest/lg.h                   |  2 +-
 include/linux/lguest.h                |  4 ++--
 7 files changed, 18 insertions(+), 26 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index e2d4a4afa8c3..3bbc07a57a31 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -20,13 +20,10 @@ extern unsigned long switcher_addr;
 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
 
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
+/* Declarations for definitions in arch/x86/lguest/head_32.S */
+extern char lguest_noirq_iret[];
 extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
 extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
 
 extern void lguest_iret(void);
 extern void lguest_init(void);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 543510a2f9e0..13616d708389 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -87,8 +87,7 @@
 
 struct lguest_data lguest_data = {
 	.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
-	.noirq_start = (u32)lguest_noirq_start,
-	.noirq_end = (u32)lguest_noirq_end,
+	.noirq_iret = (u32)lguest_noirq_iret,
 	.kernel_address = PAGE_OFFSET,
 	.blocked_interrupts = { 1 }, /* Block timer interrupts */
 	.syscall_vec = SYSCALL_VECTOR,
diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S
index 81678bf0fcb7..d5ae63f5ec5d 100644
--- a/arch/x86/lguest/head_32.S
+++ b/arch/x86/lguest/head_32.S
@@ -133,9 +133,8 @@ ENTRY(lg_restore_fl)
 	ret
 /*:*/
 
-/* These demark the EIP range where host should never deliver interrupts. */
-.global lguest_noirq_start
-.global lguest_noirq_end
+/* These demark the EIP where host should never deliver interrupts. */
+.global lguest_noirq_iret
 
 /*M:004
  * When the Host reflects a trap or injects an interrupt into the Guest, it
@@ -174,12 +173,11 @@ ENTRY(lg_restore_fl)
  *
  * The second is harder: copying eflags to lguest_data.irq_enabled will turn
  * interrupts on before we're finished, so we could be interrupted before we
- * return to userspace or wherever.  Our solution to this is to surround the
- * code with lguest_noirq_start: and lguest_noirq_end: labels.  We tell the
+ * return to userspace or wherever.  Our solution to this is to tell the
  * Host that it is *never* to interrupt us there, even if interrupts seem to be
  * enabled. (It's not necessary to protect pop instruction, since
- * data gets updated only after it completes, so we end up surrounding
- * just one instruction, iret).
+ * data gets updated only after it completes, so we only need to protect
+ * one instruction, iret).
  */
 ENTRY(lguest_iret)
 	pushl	2*4(%esp)
@@ -190,6 +188,5 @@ ENTRY(lguest_iret)
 	 * prefix makes sure we use the stack segment, which is still valid.
 	 */
 	popl	%ss:lguest_data+LGUEST_DATA_irq_enabled
-lguest_noirq_start:
+lguest_noirq_iret:
 	iret
-lguest_noirq_end:
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 1219af493c0f..19a32280731d 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -211,10 +211,9 @@ static void initialize(struct lg_cpu *cpu)
 
 	/*
 	 * The Guest tells us where we're not to deliver interrupts by putting
-	 * the range of addresses into "struct lguest_data".
+	 * the instruction address into "struct lguest_data".
 	 */
-	if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
-	    || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
+	if (get_user(cpu->lg->noirq_iret, &cpu->lg->lguest_data->noirq_iret))
 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/*
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 70dfcdc29f1f..6d4c072b61e1 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -204,8 +204,7 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 	 * They may be in the middle of an iret, where they asked us never to
 	 * deliver interrupts.
 	 */
-	if (cpu->regs->eip >= cpu->lg->noirq_start &&
-	   (cpu->regs->eip < cpu->lg->noirq_end))
+	if (cpu->regs->eip == cpu->lg->noirq_iret)
 		return;
 
 	/* If they're halted, interrupts restart them. */
@@ -395,8 +394,9 @@ static bool direct_trap(unsigned int num)
  * The Guest has the ability to turn its interrupt gates into trap gates,
  * if it is careful.  The Host will let trap gates can go directly to the
  * Guest, but the Guest needs the interrupts atomically disabled for an
- * interrupt gate.  It can do this by pointing the trap gate at instructions
- * within noirq_start and noirq_end, where it can safely disable interrupts.
+ * interrupt gate.  The Host could provide a mechanism to register more
+ * "no-interrupt" regions, and the Guest could point the trap gate at
+ * instructions within that region, where it can safely disable interrupts.
  */
 
 /*M:006
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 307e8b39e7d1..ac8ad0461e80 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -102,7 +102,7 @@ struct lguest {
 
 	struct pgdir pgdirs[4];
 
-	unsigned long noirq_start, noirq_end;
+	unsigned long noirq_iret;
 
 	unsigned int stack_pages;
 	u32 tsc_khz;
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 9962c6bb1311..6db19f35f7c5 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -61,8 +61,8 @@ struct lguest_data {
 	u32 tsc_khz;
 
 /* Fields initialized by the Guest at boot: */
-	/* Instruction range to suppress interrupts even if enabled */
-	unsigned long noirq_start, noirq_end;
+	/* Instruction to suppress interrupts even if enabled */
+	unsigned long noirq_iret;
 	/* Address above which page tables are all identical. */
 	unsigned long kernel_address;
 	/* The vector to try to use for system calls (0x40 or 0x80). */
-- 
cgit v1.2.3


From 6e0a0ea12962a2175a9f47621f9fe7a4c866cb12 Mon Sep 17 00:00:00 2001
From: Graeme Gregory <graeme.gregory@linaro.org>
Date: Tue, 24 Mar 2015 14:02:39 +0000
Subject: ACPI / sleep: Introduce CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT

ACPI 5.1 does not currently support S states for ARM64 hardware but
ACPI code will call acpi_target_system_state() and acpi_sleep_init()
for device power management, so introduce
CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT and select it for x86 and
ia64 only to make sleep functions available, and also introduce stub
function to allow other drivers to function until S states are defined
for ARM64.

It will be no functional change for x86 and IA64.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Graeme Gregory <graeme.gregory@linaro.org>
Signed-off-by: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/ia64/Kconfig       | 1 +
 arch/x86/Kconfig        | 1 +
 drivers/acpi/Kconfig    | 4 ++++
 drivers/acpi/Makefile   | 2 +-
 drivers/acpi/internal.h | 4 ++++
 5 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 074e52bf815c..cc3414fda362 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -10,6 +10,7 @@ config IA64
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select PCI if (!IA64_HP_SIM)
 	select ACPI if (!IA64_HP_SIM)
+	select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
 	select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b7d31ca55187..c3ea9f9a29d1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86_64
 ### Arch settings
 config X86
 	def_bool y
+	select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
 	select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAS_FAST_MULTIPLIER
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index e6c3ddd92665..a726381fea72 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -48,9 +48,13 @@ config ACPI_LEGACY_TABLES_LOOKUP
 config ARCH_MIGHT_HAVE_ACPI_PDC
 	bool
 
+config ACPI_SYSTEM_POWER_STATES_SUPPORT
+	bool
+
 config ACPI_SLEEP
 	bool
 	depends on SUSPEND || HIBERNATION
+	depends on ACPI_SYSTEM_POWER_STATES_SUPPORT
 	default y
 
 config ACPI_PROCFS_POWER
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 623b117ad1a2..db153c6a75d7 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -23,7 +23,7 @@ acpi-y				+= nvs.o
 
 # Power management related files
 acpi-y				+= wakeup.o
-acpi-y				+= sleep.o
+acpi-$(CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT) += sleep.o
 acpi-y				+= device_pm.o
 acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 56b321aa2b1c..ba4a61e964be 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -161,7 +161,11 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
 /*--------------------------------------------------------------------------
                                   Suspend/Resume
   -------------------------------------------------------------------------- */
+#ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT
 extern int acpi_sleep_init(void);
+#else
+static inline int acpi_sleep_init(void) { return -ENXIO; }
+#endif
 
 #ifdef CONFIG_ACPI_SLEEP
 int acpi_sleep_proc_init(void);
-- 
cgit v1.2.3


From 828aef376d7a129547bc4ebb949965040177e3da Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 24 Mar 2015 14:02:46 +0000
Subject: ACPI / processor: Introduce phys_cpuid_t for CPU hardware ID

CPU hardware ID (phys_id) is defined as u32 in structure acpi_processor,
but phys_id is used as int in acpi processor driver, so it will lead to
some inconsistence for the drivers.

Furthermore, to cater for ACPI arch ports that implement 64 bits CPU
ids a generic CPU physical id type is required.

So introduce typedef u32 phys_cpuid_t in a common file, and introduce
a macro PHYS_CPUID_INVALID as (phys_cpuid_t)(-1) if it's not defined
by other archs, this will solve the inconsistence in acpi processor driver,
and will prepare for the ACPI on ARM64 for the 64 bit CPU hardware ID
in the following patch.

CC: Rafael J Wysocki <rjw@rjwysocki.net>
Suggested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[hj: reworked cpu physid map return codes]
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/ia64/kernel/acpi.c       |  2 +-
 arch/x86/kernel/acpi/boot.c   |  2 +-
 drivers/acpi/acpi_processor.c |  7 ++++---
 drivers/acpi/processor_core.c | 30 +++++++++++++++---------------
 include/acpi/processor.h      |  6 +++---
 include/linux/acpi.h          |  7 ++++++-
 6 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 2c4498919d3c..067ef4439fa4 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -887,7 +887,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
 {
 	return _acpi_map_lsapic(handle, physid, pcpu);
 }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3d525c6124f6..e4f8582eb756 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
 {
 	return _acpi_map_lsapic(handle, physid, pcpu);
 }
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 1020b1b53a17..58f335ca2e75 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -170,7 +170,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 	acpi_status status;
 	int ret;
 
-	if (pr->phys_id == -1)
+	if (pr->phys_id == PHYS_CPUID_INVALID)
 		return -ENODEV;
 
 	status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
@@ -215,7 +215,8 @@ static int acpi_processor_get_info(struct acpi_device *device)
 	union acpi_object object = { 0 };
 	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
 	struct acpi_processor *pr = acpi_driver_data(device);
-	int phys_id, cpu_index, device_declaration = 0;
+	phys_cpuid_t phys_id;
+	int cpu_index, device_declaration = 0;
 	acpi_status status = AE_OK;
 	static int cpu0_initialized;
 	unsigned long long value;
@@ -263,7 +264,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
 	}
 
 	phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id);
-	if (phys_id < 0)
+	if (phys_id == PHYS_CPUID_INVALID)
 		acpi_handle_debug(pr->handle, "failed to get CPU physical ID.\n");
 	pr->phys_id = phys_id;
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 7962651cdbd4..51cc29909e08 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,7 +32,7 @@ static struct acpi_table_madt *get_madt_table(void)
 }
 
 static int map_lapic_id(struct acpi_subtable_header *entry,
-		 u32 acpi_id, int *apic_id)
+		 u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_apic *lapic =
 		container_of(entry, struct acpi_madt_local_apic, header);
@@ -48,7 +48,7 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_x2apic_id(struct acpi_subtable_header *entry,
-			 int device_declaration, u32 acpi_id, int *apic_id)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_x2apic *apic =
 		container_of(entry, struct acpi_madt_local_x2apic, header);
@@ -65,7 +65,7 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_lsapic_id(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, int *apic_id)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_sapic *lsapic =
 		container_of(entry, struct acpi_madt_local_sapic, header);
@@ -83,10 +83,10 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
 	return 0;
 }
 
-static int map_madt_entry(int type, u32 acpi_id)
+static phys_cpuid_t map_madt_entry(int type, u32 acpi_id)
 {
 	unsigned long madt_end, entry;
-	int phys_id = -1;	/* CPU hardware ID */
+	phys_cpuid_t phys_id = PHYS_CPUID_INVALID;	/* CPU hardware ID */
 	struct acpi_table_madt *madt;
 
 	madt = get_madt_table();
@@ -117,12 +117,12 @@ static int map_madt_entry(int type, u32 acpi_id)
 	return phys_id;
 }
 
-static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	struct acpi_subtable_header *header;
-	int phys_id = -1;
+	phys_cpuid_t phys_id = PHYS_CPUID_INVALID;
 
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
 		goto exit;
@@ -149,27 +149,27 @@ exit:
 	return phys_id;
 }
 
-int acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
 {
-	int phys_id;
+	phys_cpuid_t phys_id;
 
 	phys_id = map_mat_entry(handle, type, acpi_id);
-	if (phys_id == -1)
+	if (phys_id == PHYS_CPUID_INVALID)
 		phys_id = map_madt_entry(type, acpi_id);
 
 	return phys_id;
 }
 
-int acpi_map_cpuid(int phys_id, u32 acpi_id)
+int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
 {
 #ifdef CONFIG_SMP
 	int i;
 #endif
 
-	if (phys_id == -1) {
+	if (phys_id == PHYS_CPUID_INVALID) {
 		/*
 		 * On UP processor, there is no _MAT or MADT table.
-		 * So above phys_id is always set to -1.
+		 * So above phys_id is always set to PHYS_CPUID_INVALID.
 		 *
 		 * BIOS may define multiple CPU handles even for UP processor.
 		 * For example,
@@ -190,7 +190,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id)
 		if (nr_cpu_ids <= 1 && acpi_id == 0)
 			return acpi_id;
 		else
-			return phys_id;
+			return -1;
 	}
 
 #ifdef CONFIG_SMP
@@ -208,7 +208,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id)
 
 int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 {
-	int phys_id;
+	phys_cpuid_t phys_id;
 
 	phys_id = acpi_get_phys_id(handle, type, acpi_id);
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index b95dc32a6e6b..4188a4d3b597 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -196,7 +196,7 @@ struct acpi_processor_flags {
 struct acpi_processor {
 	acpi_handle handle;
 	u32 acpi_id;
-	u32 phys_id;	/* CPU hardware ID such as APIC ID for x86 */
+	phys_cpuid_t phys_id;	/* CPU hardware ID such as APIC ID for x86 */
 	u32 id;		/* CPU logical ID allocated by OS */
 	u32 pblk;
 	int performance_platform_limit;
@@ -310,8 +310,8 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 #endif				/* CONFIG_CPU_FREQ */
 
 /* in processor_core.c */
-int acpi_get_phys_id(acpi_handle, int type, u32 acpi_id);
-int acpi_map_cpuid(int phys_id, u32 acpi_id);
+phys_cpuid_t acpi_get_phys_id(acpi_handle, int type, u32 acpi_id);
+int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id);
 int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id);
 
 /* in processor_pdc.c */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 24c7aa8b1d20..6ec33c595aea 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -146,9 +146,14 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
 int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
 void acpi_numa_arch_fixup(void);
 
+#ifndef PHYS_CPUID_INVALID
+typedef u32 phys_cpuid_t;
+#define PHYS_CPUID_INVALID (phys_cpuid_t)(-1)
+#endif
+
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu);
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
 int acpi_unmap_cpu(int cpu);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-- 
cgit v1.2.3


From 9e9c3fe40bcd28e3f98f0ad8408435f4503f2781 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Sun, 12 Apr 2015 21:47:15 +0300
Subject: KVM: x86: Fix MSR_IA32_BNDCFGS in msrs_to_save

kvm_init_msr_list is currently called before hardware_setup. As a result,
vmx_mpx_supported always returns false when kvm_init_msr_list checks whether to
save MSR_IA32_BNDCFGS.

Move kvm_init_msr_list after vmx_hardware_setup is called to fix this issue.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>

Message-Id: <1428864435-4732-1-git-send-email-namit@cs.technion.ac.il>
Cc: stable@vger.kernel.org # 3.15+
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e1a81267f3f6..ed31c31b2485 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5799,7 +5799,6 @@ int kvm_arch_init(void *opaque)
 	kvm_set_mmio_spte_mask();
 
 	kvm_x86_ops = ops;
-	kvm_init_msr_list();
 
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
 			PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -7253,7 +7252,14 @@ void kvm_arch_hardware_disable(void)
 
 int kvm_arch_hardware_setup(void)
 {
-	return kvm_x86_ops->hardware_setup();
+	int r;
+
+	r = kvm_x86_ops->hardware_setup();
+	if (r != 0)
+		return r;
+
+	kvm_init_msr_list();
+	return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
-- 
cgit v1.2.3


From bea15428b9d6bc36e87288f168ab314619a66757 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 13 Apr 2015 15:40:02 +0200
Subject: KVM: x86: cleanup kvm_irq_delivery_to_apic_fast

Sparse is reporting a "we previously assumed 'src' could be null" error.
This is true as far as the static analyzer can see, but in practice only
IPIs can set shorthand to self and they also set 'src', so it's ok.
Still, move the initialization of x2apic_ipi (and thus the NULL check for
src right before the first use.

While at it, initializing ret to "false" is somewhat confusing because of
the almost immediate assigned of "true" to the same variable.  Thus,
initialize it to "true" and modify it in the only path that used to use
the value from "bool ret = false".  There is no change in generated code
from this change.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d67206a7b99a..629af0f1c5c4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	unsigned long bitmap = 1;
 	struct kvm_lapic **dst;
 	int i;
-	bool ret = false;
-	bool x2apic_ipi = src && apic_x2apic_mode(src);
+	bool ret, x2apic_ipi;
 
 	*r = -1;
 
@@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	if (irq->shorthand)
 		return false;
 
+	x2apic_ipi = src && apic_x2apic_mode(src);
 	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
 		return false;
 
+	ret = true;
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
-	if (!map)
+	if (!map) {
+		ret = false;
 		goto out;
-
-	ret = true;
+	}
 
 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
-- 
cgit v1.2.3


From 4d178f94ebe123d462a51169b53854cb7f198888 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sun, 12 Apr 2015 09:14:45 -0400
Subject: x86/asm: Merge common 32-bit values in asm-offsets.c

Merge common values for 32-bit native and compat.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Link: http://lkml.kernel.org/r/1428844486-6638-1-git-send-email-brgerst@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/asm-offsets.c    | 19 +++++++++++++++++++
 arch/x86/kernel/asm-offsets_32.c | 15 ---------------
 arch/x86/kernel/asm-offsets_64.c | 21 ---------------------
 3 files changed, 19 insertions(+), 36 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 9f6b9341950f..b27f6ec90caa 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -41,6 +41,25 @@ void common(void) {
 	OFFSET(pbe_orig_address, pbe, orig_address);
 	OFFSET(pbe_next, pbe, next);
 
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+	BLANK();
+	OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax);
+	OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx);
+	OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx);
+	OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx);
+	OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si);
+	OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di);
+	OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp);
+	OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp);
+	OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
+
+	BLANK();
+	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+
+	BLANK();
+	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
+#endif
+
 #ifdef CONFIG_PARAVIRT
 	BLANK();
 	OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 47703aed74cf..628bfd4c06bb 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -17,17 +17,6 @@ void foo(void);
 
 void foo(void)
 {
-	OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
-	OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
-	OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
-	OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
-	OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
-	OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
-	OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
-	OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
-	OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
-	BLANK();
-
 	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
 	OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
 	OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
@@ -37,7 +26,6 @@ void foo(void)
 	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
 	BLANK();
 
-	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
 	OFFSET(TI_cpu, thread_info, cpu);
 	BLANK();
 
@@ -60,9 +48,6 @@ void foo(void)
 	OFFSET(PT_OLDSS,  pt_regs, ss);
 	BLANK();
 
-	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
-	BLANK();
-
 	OFFSET(saved_context_gdt_desc, saved_context, gdt_desc);
 	BLANK();
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 5ce6f2da8763..dcaab87da629 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -29,27 +29,6 @@ int main(void)
 	BLANK();
 #endif
 
-#ifdef CONFIG_IA32_EMULATION
-	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
-	BLANK();
-
-#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
-	ENTRY(ax);
-	ENTRY(bx);
-	ENTRY(cx);
-	ENTRY(dx);
-	ENTRY(si);
-	ENTRY(di);
-	ENTRY(bp);
-	ENTRY(sp);
-	ENTRY(ip);
-	BLANK();
-#undef ENTRY
-
-	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
-	BLANK();
-#endif
-
 #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
 	ENTRY(bx);
 	ENTRY(cx);
-- 
cgit v1.2.3


From 14434052ffb3b7fe8f491e9d0a7793376fb79155 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sun, 12 Apr 2015 09:14:46 -0400
Subject: x86/asm: Remove unused TI_cpu

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Link: http://lkml.kernel.org/r/1428844486-6638-2-git-send-email-brgerst@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/asm-offsets_32.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 628bfd4c06bb..6ce39025f467 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -26,9 +26,6 @@ void foo(void)
 	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
 	BLANK();
 
-	OFFSET(TI_cpu, thread_info, cpu);
-	BLANK();
-
 	OFFSET(PT_EBX, pt_regs, bx);
 	OFFSET(PT_ECX, pt_regs, cx);
 	OFFSET(PT_EDX, pt_regs, dx);
-- 
cgit v1.2.3


From ff22e2010144b6aa050da35851f1fa79087cca06 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 12 Apr 2015 21:45:06 +0200
Subject: x86/asm, x86/power/hibernate: Use local labels in asm

... so that they don't appear in the object file and thus in
objdump output. They're local anyway and have a meaning only
within that file.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-pm@vger.kernel.org
Link: http://lkml.kernel.org/r/1428867906-12016-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/power/hibernate_asm_64.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 3c4469a7a929..e2386cb4e0c3 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -78,9 +78,9 @@ ENTRY(restore_image)
 
 	/* code below has been relocated to a safe page */
 ENTRY(core_restore_code)
-loop:
+.Lloop:
 	testq	%rdx, %rdx
-	jz	done
+	jz	.Ldone
 
 	/* get addresses from the pbe and copy the page */
 	movq	pbe_address(%rdx), %rsi
@@ -91,8 +91,8 @@ loop:
 
 	/* progress to the next pbe */
 	movq	pbe_next(%rdx), %rdx
-	jmp	loop
-done:
+	jmp	.Lloop
+.Ldone:
 	/* jump to the restore_registers address from the image header */
 	jmpq	*%rax
 	/*
-- 
cgit v1.2.3


From c0f6feba784e1087b905ad097d2d9ac0aaf744a5 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 15 Apr 2015 08:50:14 +0200
Subject: x86/asm, x86/acpi/wakeup_64.S: Make global label a local one

Make it a local symbol so that it doesn't appear in objdump
output.

No functionality change - code remains the same, just the global
label disappears:

	 ffffffff81039dbe:       bf 03 00 00 00          mov    $0x3,%edi
	 ffffffff81039dc3:       31 c0                   xor    %eax,%eax
	 ffffffff81039dc5:       e8 b6 fd ff ff          callq  ffffffff81039b80 <x86_acpi_enter_sleep_state>
	-ffffffff81039dca:       eb 00                   jmp    ffffffff81039dcc <resume_point>
	-
	-ffffffff81039dcc <resume_point>:
	+ffffffff81039dca:       eb 00                   jmp    ffffffff81039dcc <do_suspend_lowlevel+0x9c>
	 ffffffff81039dcc:       48 c7 c0 80 1a ca 82    mov    $0xffffffff82ca1a80,%rax
	 ffffffff81039dd3:       48 8b 98 e2 00 00 00    mov    0xe2(%rax),%rbx
	 ffffffff81039dda:       0f 22 e3                mov    %rbx,%cr4

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <linux-pm@vger.kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1429080614-22610-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/acpi/wakeup_64.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index ae693b51ed8e..8c35df468104 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -62,7 +62,7 @@ ENTRY(do_suspend_lowlevel)
 	pushfq
 	popq	pt_regs_flags(%rax)
 
-	movq	$resume_point, saved_rip(%rip)
+	movq	$.Lresume_point, saved_rip(%rip)
 
 	movq	%rsp, saved_rsp
 	movq	%rbp, saved_rbp
@@ -75,10 +75,10 @@ ENTRY(do_suspend_lowlevel)
 	xorl	%eax, %eax
 	call	x86_acpi_enter_sleep_state
 	/* in case something went wrong, restore the machine status and go on */
-	jmp	resume_point
+	jmp	.Lresume_point
 
 	.align 4
-resume_point:
+.Lresume_point:
 	/* We don't restore %rax, it must be 0 anyway */
 	movq	$saved_context, %rax
 	movq	saved_context_cr4(%rax), %rbx
-- 
cgit v1.2.3


From 130005231c9f2090b1b177e2cca9841b562c1784 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Wed, 15 Apr 2015 10:24:54 +0800
Subject: kvm: mmu: don't do memslot overflow check

As Andres pointed out:

| I don't understand the value of this check here. Are we looking for a
| broken memslot? Shouldn't this be a BUG_ON? Is this the place to care
| about these things? npages is capped to KVM_MEM_MAX_NR_PAGES, i.e.
| 2^31. A 64 bit overflow would be caused by a gigantic gfn_start which
| would be trouble in many other ways.

This patch drops the memslot overflow check to make the codes more simple.

Reviewed-by: Andres Lagar-Cavilla <andreslc@google.com>
Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Message-Id: <1429064694-3072-1-git-send-email-wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 146f295ee322..07bb22157338 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4504,19 +4504,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	bool flush = false;
 	unsigned long *rmapp;
 	unsigned long last_index, index;
-	gfn_t gfn_start, gfn_end;
 
 	spin_lock(&kvm->mmu_lock);
 
-	gfn_start = memslot->base_gfn;
-	gfn_end = memslot->base_gfn + memslot->npages - 1;
-
-	if (gfn_start >= gfn_end)
-		goto out;
-
 	rmapp = memslot->arch.rmap[0];
-	last_index = gfn_to_index(gfn_end, memslot->base_gfn,
-					PT_PAGE_TABLE_LEVEL);
+	last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1,
+				memslot->base_gfn, PT_PAGE_TABLE_LEVEL);
 
 	for (index = 0; index <= last_index; ++index, ++rmapp) {
 		if (*rmapp)
@@ -4534,7 +4527,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	if (flush)
 		kvm_flush_remote_tlbs(kvm);
 
-out:
 	spin_unlock(&kvm->mmu_lock);
 }
 
-- 
cgit v1.2.3


From decf63336e356423300b935afbebeca1fcb15184 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Date: Tue, 14 Apr 2015 12:04:10 +0800
Subject: KVM: MMU: fix comment in kvm_mmu_zap_collapsible_spte

Soft mmu uses direct shadow page to fill guest large mapping with small
pages if huge mapping is disallowed on host. So zapping direct shadow
page works well both for soft mmu and hard mmu, it's just less widely
applicable.

Fix the comment to reflect this.

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Message-Id: <552C91BA.1010703@linux.intel.com>
[Fix comment wording further. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 07bb22157338..d43867c33bc4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 		pfn = spte_to_pfn(*sptep);
 
 		/*
-		 * Only EPT supported for now; otherwise, one would need to
-		 * find out efficiently whether the guest page tables are
-		 * also using huge pages.
+		 * We cannot do huge page mapping for indirect shadow pages,
+		 * which are found on the last rmap (level = 1) when not using
+		 * tdp; such shadow pages are synced with the page table in
+		 * the guest, and the guest page table is using 4K page size
+		 * mapping if the indirect sp has level = 1.
 		 */
 		if (sp->role.direct &&
 			!kvm_is_reserved_pfn(pfn) &&
-- 
cgit v1.2.3


From bb668734c4c960c8f61f017585b323b97e5f47b5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 17 Mar 2015 22:26:21 +0000
Subject: VFS: assorted d_backing_inode() annotations

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kvm/assigned-dev.c | 2 +-
 drivers/mtd/ubi/build.c     | 6 +++---
 drivers/mtd/ubi/kapi.c      | 2 +-
 fs/block_dev.c              | 2 +-
 fs/posix_acl.c              | 8 ++++----
 fs/stat.c                   | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index 6eb5c20ee373..d090ecf08809 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -666,7 +666,7 @@ static int probe_sysfs_permissions(struct pci_dev *dev)
 		if (r)
 			return r;
 
-		inode = path.dentry->d_inode;
+		inode = d_backing_inode(path.dentry);
 
 		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
 		path_put(&path);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index ba01a8d22d28..5f58635b8360 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1164,9 +1164,9 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 		return ERR_PTR(err);
 
 	/* MTD device number is defined by the major / minor numbers */
-	major = imajor(path.dentry->d_inode);
-	minor = iminor(path.dentry->d_inode);
-	mode = path.dentry->d_inode->i_mode;
+	major = imajor(d_backing_inode(path.dentry));
+	minor = iminor(d_backing_inode(path.dentry));
+	mode = d_backing_inode(path.dentry)->i_mode;
 	path_put(&path);
 	if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode))
 		return ERR_PTR(-EINVAL);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 478e00cf2d9e..e844887732fb 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -314,7 +314,7 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 	if (error)
 		return ERR_PTR(error);
 
-	inode = path.dentry->d_inode;
+	inode = d_backing_inode(path.dentry);
 	mod = inode->i_mode;
 	ubi_num = ubi_major2num(imajor(inode));
 	vol_id = iminor(inode) - 1;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 897ee0503932..79b4fa3b391d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1716,7 +1716,7 @@ struct block_device *lookup_bdev(const char *pathname)
 	if (error)
 		return ERR_PTR(error);
 
-	inode = path.dentry->d_inode;
+	inode = d_backing_inode(path.dentry);
 	error = -ENOTBLK;
 	if (!S_ISBLK(inode->i_mode))
 		goto fail;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 3a48bb789c9f..84bb65b83570 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -774,12 +774,12 @@ posix_acl_xattr_get(struct dentry *dentry, const char *name,
 	struct posix_acl *acl;
 	int error;
 
-	if (!IS_POSIXACL(dentry->d_inode))
+	if (!IS_POSIXACL(d_backing_inode(dentry)))
 		return -EOPNOTSUPP;
 	if (d_is_symlink(dentry))
 		return -EOPNOTSUPP;
 
-	acl = get_acl(dentry->d_inode, type);
+	acl = get_acl(d_backing_inode(dentry), type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -795,7 +795,7 @@ static int
 posix_acl_xattr_set(struct dentry *dentry, const char *name,
 		const void *value, size_t size, int flags, int type)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = d_backing_inode(dentry);
 	struct posix_acl *acl = NULL;
 	int ret;
 
@@ -834,7 +834,7 @@ posix_acl_xattr_list(struct dentry *dentry, char *list, size_t list_size,
 	const char *xname;
 	size_t size;
 
-	if (!IS_POSIXACL(dentry->d_inode))
+	if (!IS_POSIXACL(d_backing_inode(dentry)))
 		return -EOPNOTSUPP;
 	if (d_is_symlink(dentry))
 		return -EOPNOTSUPP;
diff --git a/fs/stat.c b/fs/stat.c
index 19636af5e75c..cccc1aab9a8b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(generic_fillattr);
  */
 int vfs_getattr_nosec(struct path *path, struct kstat *stat)
 {
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = d_backing_inode(path->dentry);
 
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -326,7 +326,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
 retry:
 	error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);
 	if (!error) {
-		struct inode *inode = path.dentry->d_inode;
+		struct inode *inode = d_backing_inode(path.dentry);
 
 		error = empty ? -ENOENT : -EINVAL;
 		if (inode->i_op->readlink) {
-- 
cgit v1.2.3


From 6a907738ab9840ca3d71c22cd28fba4cbae7f7ce Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 15 Apr 2015 10:51:26 +0100
Subject: x86/asm: Enable fast 32-bit put_user_64() for copy_to_user()

For fixed sized copies, copy_to_user() will utilize
__put_user_size() fastpaths. However, it is missing the
translation for 64-bit copies on x86/32.

Testing on a Pinetrail Atom, the 64 bit put_user() fastpath
is substantially faster than the generic copy_to_user()
fallback.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: intel-gfx@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1429091486-11443-1-git-send-email-chris@chris-wilson.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uaccess_32.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 3c03a5de64d3..0ed5504c6060 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 			__put_user_size(*(u32 *)from, (u32 __user *)to,
 					4, ret, 4);
 			return ret;
+		case 8:
+			__put_user_size(*(u64 *)from, (u64 __user *)to,
+					8, ret, 8);
+			return ret;
 		}
 	}
 	return __copy_to_user_ll(to, from, n);
-- 
cgit v1.2.3


From 98b228f55014870092c15d7d168fecac69f2f12a Mon Sep 17 00:00:00 2001
From: Roy Franz <roy.franz@linaro.org>
Date: Wed, 15 Apr 2015 16:32:24 -0700
Subject: x86/efi: Store upper bits of command line buffer address in
 ext_cmd_line_ptr

Until now, the EFI stub was only setting the 32 bit cmd_line_ptr in
the setup_header structure, so on 64 bit platforms this could be truncated.
This patch adds setting the upper bits of the buffer address in
ext_cmd_line_ptr.  This case was likely never hit, as the allocation
for this buffer is done at the lowest available address.  Only
x86_64 kernels have this problem, as the 1-1 mapping mandated
by EFI ensures that all memory is 32 bit addressable on 32 bit
platforms.  The EFI stub does not support mixed mode, so the
32 bit kernel on 64 bit firmware case does not need to be handled.

Signed-off-by: Roy Franz <roy.franz@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 92b9a5f2aed6..5999980206bf 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1110,6 +1110,8 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	if (!cmdline_ptr)
 		goto fail;
 	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+	/* Fill in upper bits of command line address, NOP on 32 bit  */
+	boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
 
 	hdr->ramdisk_image = 0;
 	hdr->ramdisk_size = 0;
-- 
cgit v1.2.3


From 94d4b4765b7ddb8478b0d57663cf7a08e2263bbf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 23 Nov 2012 19:19:07 +0100
Subject: x86/mm: Clean up types in xlate_dev_mem_ptr()

Pavel Machek reported the following compiler warning on
x86/32 CONFIG_HIGHMEM64G=y builds:

  arch/x86/mm/ioremap.c:344:10: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]

Clean up the types in this function by using a single natural type for
internal calculations (unsigned long), to make it more apparent what's
happening, and also to remove fragile casts.

Reported-by: Pavel Machek <pavel@ucw.cz>
Cc: jgross@suse.com
Cc: roland@purestorage.com
Link: http://lkml.kernel.org/r/20150416080440.GA507@amd
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/ioremap.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index fdf617c00e2f..4bf037b20f47 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -332,18 +332,20 @@ EXPORT_SYMBOL(iounmap);
  */
 void *xlate_dev_mem_ptr(phys_addr_t phys)
 {
-	void *addr;
-	unsigned long start = phys & PAGE_MASK;
+	unsigned long start  = phys &  PAGE_MASK;
+	unsigned long offset = phys & ~PAGE_MASK;
+	unsigned long vaddr;
 
 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
 	if (page_is_ram(start >> PAGE_SHIFT))
 		return __va(phys);
 
-	addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
-	if (addr)
-		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+	vaddr = (unsigned long)ioremap_cache(start, PAGE_SIZE);
+	/* Only add the offset on success and return NULL if the ioremap() failed: */
+	if (vaddr)
+		vaddr += offset;
 
-	return addr;
+	return (void *)vaddr;
 }
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
-- 
cgit v1.2.3


From 085e68eeafbf76e21848ad5bafaecec88a11dd64 Mon Sep 17 00:00:00 2001
From: Ben Serebrin <serebrin@google.com>
Date: Thu, 16 Apr 2015 11:58:05 -0700
Subject: KVM: VMX: Preserve host CR4.MCE value while in guest mode.

The host's decision to enable machine check exceptions should remain
in force during non-root mode.  KVM was writing 0 to cr4 on VCPU reset
and passed a slightly-modified 0 to the vmcs.guest_cr4 value.

Tested: Built.
On earlier version, tested by injecting machine check
while a guest is spinning.

Before the change, if guest CR4.MCE==0, then the machine check is
escalated to Catastrophic Error (CATERR) and the machine dies.
If guest CR4.MCE==1, then the machine check causes VMEXIT and is
handled normally by host Linux. After the change, injecting a machine
check causes normal Linux machine check handling.

Signed-off-by: Ben Serebrin <serebrin@google.com>
Reviewed-by: Venkatesh Srinivas <venkateshs@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f5e8dce8046c..f7b61687bd79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-	unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
-		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+	/*
+	 * Pass through host's Machine Check Enable value to hw_cr4, which
+	 * is in force while we are in guest mode.  Do not let guests control
+	 * this bit, even if host CR4.MCE == 0.
+	 */
+	unsigned long hw_cr4 =
+		(cr4_read_shadow() & X86_CR4_MCE) |
+		(cr4 & ~X86_CR4_MCE) |
+		(to_vmx(vcpu)->rmode.vm86_active ?
+		 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
 	if (cr4 & X86_CR4_VMXE) {
 		/*
-- 
cgit v1.2.3


From aac82d319148c6a84e1bf90b86d3e0ec8bf0ee38 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 3 Apr 2015 15:51:54 -0700
Subject: x86, paravirt, xen: Remove the 64-bit ->irq_enable_sysexit() pvop

We don't use irq_enable_sysexit on 64-bit kernels any more.
Remove all the paravirt and Xen machinery to support it on
64-bit kernels.

Tested-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8a03355698fe5b94194e9e7360f19f91c1b2cf1f.1428100853.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32entry.S             |  6 ------
 arch/x86/include/asm/paravirt_types.h |  7 ++++---
 arch/x86/kernel/asm-offsets.c         |  2 ++
 arch/x86/kernel/paravirt.c            |  4 +++-
 arch/x86/kernel/paravirt_patch_64.c   |  1 -
 arch/x86/xen/enlighten.c              |  3 ++-
 arch/x86/xen/xen-asm_64.S             | 16 ----------------
 arch/x86/xen/xen-ops.h                |  2 ++
 8 files changed, 13 insertions(+), 28 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a821b1cd4fa7..3cdb9eafbf8c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32)
 	swapgs
 	sysretl
 ENDPROC(native_usergs_sysret32)
-
-ENTRY(native_irq_enable_sysexit)
-	swapgs
-	sti
-	sysexit
-ENDPROC(native_irq_enable_sysexit)
 #endif
 
 /*
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7549b8b369e4..38a0ff9ef06e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -160,13 +160,14 @@ struct pv_cpu_ops {
 	u64 (*read_pmc)(int counter);
 	unsigned long long (*read_tscp)(unsigned int *aux);
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Atomically enable interrupts and return to userspace.  This
-	 * is only ever used to return to 32-bit processes; in a
-	 * 64-bit kernel, it's used for 32-on-64 compat processes, but
-	 * never native 64-bit processes.  (Jump, not call.)
+	 * is only used in 32-bit kernels.  64-bit kernels use
+	 * usergs_sysret32 instead.
 	 */
 	void (*irq_enable_sysexit)(void);
+#endif
 
 	/*
 	 * Switch to usermode gs and return to 64-bit usermode using
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index b27f6ec90caa..8e3d22a1af94 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -68,7 +68,9 @@ void common(void) {
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+#ifdef CONFIG_X86_32
 	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+#endif
 	OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 	OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
 #endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 548d25f00c90..7563114d9c3a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 		ret = paravirt_patch_ident_64(insnbuf, len);
 
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+#ifdef CONFIG_X86_32
 		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
+#endif
 		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
 		/* If operation requires a jmp, then jmp */
@@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 
 	.load_sp0 = native_load_sp0,
 
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+#if defined(CONFIG_X86_32)
 	.irq_enable_sysexit = native_irq_enable_sysexit,
 #endif
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index a1da6737ba5b..0de21c62c348 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, save_fl);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
-		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret32);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret64);
 		PATCH_SITE(pv_cpu_ops, swapgs);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 81665c9f2132..3797b6b31f95 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1267,10 +1267,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.read_tscp = native_read_tscp,
 
 	.iret = xen_iret,
-	.irq_enable_sysexit = xen_sysexit,
 #ifdef CONFIG_X86_64
 	.usergs_sysret32 = xen_sysret32,
 	.usergs_sysret64 = xen_sysret64,
+#else
+	.irq_enable_sysexit = xen_sysexit,
 #endif
 
 	.load_tr_desc = paravirt_nop,
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 985fc3ee0973..a2cabb8bd6bf 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -47,22 +47,6 @@ ENTRY(xen_iret)
 ENDPATCH(xen_iret)
 RELOC(xen_iret, 1b+1)
 
-/*
- * sysexit is not used for 64-bit processes, so it's only ever used to
- * return to 32-bit compat userspace.
- */
-ENTRY(xen_sysexit)
-	pushq $__USER32_DS
-	pushq %rcx
-	pushq $X86_EFLAGS_IF
-	pushq $__USER32_CS
-	pushq %rdx
-
-	pushq $0
-1:	jmp hypercall_iret
-ENDPATCH(xen_sysexit)
-RELOC(xen_sysexit, 1b+1)
-
 ENTRY(xen_sysret64)
 	/*
 	 * We're already on the usermode stack at this point, but
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e195c683549..c20fe29e65f4 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
 /* These are not functions, and cannot be called normally */
 __visible void xen_iret(void);
+#ifdef CONFIG_X86_32
 __visible void xen_sysexit(void);
+#endif
 __visible void xen_sysret32(void);
 __visible void xen_sysret64(void);
 __visible void xen_adjust_exception_frame(void);
-- 
cgit v1.2.3


From 3462bd2adeadc49d9e126bca3b5536a3437a902d Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Mon, 20 Apr 2015 23:27:11 +0200
Subject: x86/asm: Always inline atomics

During some code analysis I realized that atomic_add(), atomic_sub()
and friends are not necessarily inlined AND that each function
is defined multiple times:

	atomic_inc:          544 duplicates
	atomic_dec:          215 duplicates
	atomic_dec_and_test: 107 duplicates
	atomic64_inc:         38 duplicates
	[...]

Each definition is exact equally, e.g.:

	ffffffff813171b8 <atomic_add>:
	55         push   %rbp
	48 89 e5   mov    %rsp,%rbp
	f0 01 3e   lock add %edi,(%rsi)
	5d         pop    %rbp
	c3         retq

In turn each definition has one or more callsites (sure):

	ffffffff81317c78: e8 3b f5 ff ff  callq  ffffffff813171b8 <atomic_add> [...]
	ffffffff8131a062: e8 51 d1 ff ff  callq  ffffffff813171b8 <atomic_add> [...]
	ffffffff8131a190: e8 23 d0 ff ff  callq  ffffffff813171b8 <atomic_add> [...]

The other way around would be to remove the static linkage - but
I prefer an enforced inlining here.

	Before:
	  text     data	  bss      dec       hex     filename
	  81467393 19874720 20168704 121510817 73e1ba1 vmlinux.orig

	After:
	  text     data     bss      dec       hex     filename
	  81461323 19874720 20168704 121504747 73e03eb vmlinux.inlined

Yes, the inlining here makes the kernel even smaller! ;)

Linus further observed:

	"I have this memory of having seen that before - the size
	 heuristics for gcc getting confused by inlining.
	 [...]

	 It might be a good idea to mark things that are basically just
	 wrappers around a single (or a couple of) asm instruction to be
	 always_inline."

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1429565231-4609-1-git-send-email-hagen@jauu.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/atomic.h      | 16 ++++++++--------
 arch/x86/include/asm/atomic64_64.h |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 5e5cd123fdfb..75a9ee8529f3 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i)
  *
  * Atomically adds @i to @v.
  */
-static inline void atomic_add(int i, atomic_t *v)
+static __always_inline void atomic_add(int i, atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "addl %1,%0"
 		     : "+m" (v->counter)
@@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v)
  *
  * Atomically subtracts @i from @v.
  */
-static inline void atomic_sub(int i, atomic_t *v)
+static __always_inline void atomic_sub(int i, atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "subl %1,%0"
 		     : "+m" (v->counter)
@@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic_sub_and_test(int i, atomic_t *v)
+static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
 }
@@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
  *
  * Atomically increments @v by 1.
  */
-static inline void atomic_inc(atomic_t *v)
+static __always_inline void atomic_inc(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "incl %0"
 		     : "+m" (v->counter));
@@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v)
  *
  * Atomically decrements @v by 1.
  */
-static inline void atomic_dec(atomic_t *v)
+static __always_inline void atomic_dec(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "decl %0"
 		     : "+m" (v->counter));
@@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int atomic_dec_and_test(atomic_t *v)
+static __always_inline int atomic_dec_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
 }
@@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
  *
  * Atomically adds @i to @v and returns @i + @v
  */
-static inline int atomic_add_return(int i, atomic_t *v)
+static __always_inline int atomic_add_return(int i, atomic_t *v)
 {
 	return i + xadd(&v->counter, i);
 }
@@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns the old value of @v.
  */
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index f8d273e18516..b965f9e03f2a 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i)
  *
  * Atomically adds @i to @v.
  */
-static inline void atomic64_add(long i, atomic64_t *v)
+static __always_inline void atomic64_add(long i, atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "addq %1,%0"
 		     : "=m" (v->counter)
@@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v)
  *
  * Atomically increments @v by 1.
  */
-static inline void atomic64_inc(atomic64_t *v)
+static __always_inline void atomic64_inc(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "incq %0"
 		     : "=m" (v->counter)
@@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v)
  *
  * Atomically decrements @v by 1.
  */
-static inline void atomic64_dec(atomic64_t *v)
+static __always_inline void atomic64_dec(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "decq %0"
 		     : "=m" (v->counter)
@@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v)
  *
  * Atomically adds @i to @v and returns @i + @v
  */
-static inline long atomic64_add_return(long i, atomic64_t *v)
+static __always_inline long atomic64_add_return(long i, atomic64_t *v)
 {
 	return i + xadd(&v->counter, i);
 }
-- 
cgit v1.2.3


From 3b6e042188994466ec257b71296b5f85b894dcd9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 21 Apr 2015 17:26:23 +0200
Subject: perf/x86/intel: Add cpu_(prepare|starting|dying) for core_pmu

The core_pmu does not define cpu_* callbacks, which handles
allocation of 'struct cpu_hw_events::shared_regs' data,
initialization of debug store and PMU_FL_EXCL_CNTRS counters.

While this probably won't happen on bare metal, virtual CPU can
define x86_pmu.extra_regs together with PMU version 1 and thus
be using core_pmu -> using shared_regs data without it being
allocated. That could could leave to following panic:

	BUG: unable to handle kernel NULL pointer dereference at (null)
	IP: [<ffffffff8152cd4f>] _spin_lock_irqsave+0x1f/0x40

	SNIP

	 [<ffffffff81024bd9>] __intel_shared_reg_get_constraints+0x69/0x1e0
	 [<ffffffff81024deb>] intel_get_event_constraints+0x9b/0x180
	 [<ffffffff8101e815>] x86_schedule_events+0x75/0x1d0
	 [<ffffffff810586dc>] ? check_preempt_curr+0x7c/0x90
	 [<ffffffff810649fe>] ? try_to_wake_up+0x24e/0x3e0
	 [<ffffffff81064ba2>] ? default_wake_function+0x12/0x20
	 [<ffffffff8109eb16>] ? autoremove_wake_function+0x16/0x40
	 [<ffffffff810577e9>] ? __wake_up_common+0x59/0x90
	 [<ffffffff811a9517>] ? __d_lookup+0xa7/0x150
	 [<ffffffff8119db5f>] ? do_lookup+0x9f/0x230
	 [<ffffffff811a993a>] ? dput+0x9a/0x150
	 [<ffffffff8119c8f5>] ? path_to_nameidata+0x25/0x60
	 [<ffffffff8119e90a>] ? __link_path_walk+0x7da/0x1000
	 [<ffffffff8101d8f9>] ? x86_pmu_add+0xb9/0x170
	 [<ffffffff8101d7a7>] x86_pmu_commit_txn+0x67/0xc0
	 [<ffffffff811b07b0>] ? mntput_no_expire+0x30/0x110
	 [<ffffffff8119c731>] ? path_put+0x31/0x40
	 [<ffffffff8107c297>] ? current_fs_time+0x27/0x30
	 [<ffffffff8117d170>] ? mem_cgroup_get_reclaim_stat_from_page+0x20/0x70
	 [<ffffffff8111b7aa>] group_sched_in+0x13a/0x170
	 [<ffffffff81014a29>] ? sched_clock+0x9/0x10
	 [<ffffffff8111bac8>] ctx_sched_in+0x2e8/0x330
	 [<ffffffff8111bb7b>] perf_event_sched_in+0x6b/0xb0
	 [<ffffffff8111bc36>] perf_event_context_sched_in+0x76/0xc0
	 [<ffffffff8111eb3b>] perf_event_comm+0x1bb/0x2e0
	 [<ffffffff81195ee9>] set_task_comm+0x69/0x80
	 [<ffffffff81195fe1>] setup_new_exec+0xe1/0x2e0
	 [<ffffffff811ea68e>] load_elf_binary+0x3ce/0x1ab0

Adding cpu_(prepare|starting|dying) for core_pmu to have
shared_regs data allocated for core_pmu. AFAICS there's no harm
to initialize debug store and PMU_FL_EXCL_CNTRS either for
core_pmu.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20150421152623.GC13169@krava.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 66 +++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 28 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 219d3fb423a1..960e85de13fb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
 	return x86_event_sysfs_show(page, config, event);
 }
 
-static __initconst const struct x86_pmu core_pmu = {
-	.name			= "core",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= core_pmu_enable_all,
-	.enable			= core_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_pmu_hw_config,
-	.schedule_events	= x86_schedule_events,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic event period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.get_event_constraints	= intel_get_event_constraints,
-	.put_event_constraints	= intel_put_event_constraints,
-	.event_constraints	= intel_core_event_constraints,
-	.guest_get_msrs		= core_guest_get_msrs,
-	.format_attrs		= intel_arch_formats_attr,
-	.events_sysfs_show	= intel_event_sysfs_show,
-};
-
 struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
 	struct intel_shared_regs *regs;
@@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	NULL,
 };
 
+static __initconst const struct x86_pmu core_pmu = {
+	.name			= "core",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= core_pmu_enable_all,
+	.enable			= core_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.hw_config		= x86_pmu_hw_config,
+	.schedule_events	= x86_schedule_events,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32-bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL<<31) - 1,
+	.get_event_constraints	= intel_get_event_constraints,
+	.put_event_constraints	= intel_put_event_constraints,
+	.event_constraints	= intel_core_event_constraints,
+	.guest_get_msrs		= core_guest_get_msrs,
+	.format_attrs		= intel_arch_formats_attr,
+	.events_sysfs_show	= intel_event_sysfs_show,
+
+	/*
+	 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+	 * together with PMU version 1 and thus be using core_pmu with
+	 * shared_regs. We need following callbacks here to allocate
+	 * it properly.
+	 */
+	.cpu_prepare		= intel_pmu_cpu_prepare,
+	.cpu_starting		= intel_pmu_cpu_starting,
+	.cpu_dying		= intel_pmu_cpu_dying,
+};
+
 static __initconst const struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
-- 
cgit v1.2.3


From 80bcffb376a6890dd7452b12c1ba032f8f24fef6 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Mon, 20 Apr 2015 15:34:07 -0700
Subject: perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power
 Mobile Processor) IMC uncore PMUs

This uncore is the same as the Haswell desktop part but uses a
different PCI ID.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonnyrao@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 3001015b755c..ca75e70865ef 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,6 +1,9 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
+/* Uncore IMC PCI Id */
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
+
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
 #define SNB_UNC_CTL_UMASK_MASK			0x0000ff00
@@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
 	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	{  /* end marker */ }
 };
 
-- 
cgit v1.2.3


From 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Tue, 21 Apr 2015 12:33:11 -0700
Subject: perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver

This keeps all the related PCI IDs together in the driver where
they are used.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonnyrao@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +++++-
 include/linux/pci_ids.h                           | 4 ----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index ca75e70865ef..4562e9e22c60 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,7 +1,11 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
-/* Uncore IMC PCI Id */
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 
 /* SNB event control */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e63c02a93f6b..a59385852233 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2539,10 +2539,6 @@
 
 #define PCI_VENDOR_ID_INTEL		0x8086
 #define PCI_DEVICE_ID_INTEL_EESSC	0x0008
-#define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_PXHD_0	0x0320
 #define PCI_DEVICE_ID_INTEL_PXHD_1	0x0321
 #define PCI_DEVICE_ID_INTEL_PXH_0	0x0329
-- 
cgit v1.2.3


From 17be0aec74fb036eb4eb32c2268f3420a034762b Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Tue, 21 Apr 2015 18:27:29 +0200
Subject: x86/asm/entry/64: Implement better check for canonical addresses

This change makes the check exact (no more false positives
on "negative" addresses).

Andy explains:

 "Canonical addresses either start with 17 zeros or 17 ones.

  In the old code, we checked that the top (64-47) = 17 bits were all
  zero.  We did this by shifting right by 47 bits and making sure that
  nothing was left.

  In the new code, we're shifting left by (64 - 48) = 16 bits and then
  signed shifting right by the same amount, this propagating the 17th
  highest bit to all positions to its left.  If we get the same value we
  started with, then we're good to go."

While it isn't really important to be fully correct here -
almost all addresses we'll ever see will be userspace ones,
but OTOH it looks to be cheap enough: the new code uses
two more ALU ops but preserves %rcx, allowing to not
reload it from pt_regs->cx again.

On disassembly level, the changes are:

  cmp %rcx,0x80(%rsp) -> mov 0x80(%rsp),%r11; cmp %rcx,%r11
  shr $0x2f,%rcx      -> shl $0x10,%rcx; sar $0x10,%rcx; cmp %rcx,%r11
  mov 0x58(%rsp),%rcx -> (eliminated)

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429633649-20169-1-git-send-email-dvlasenk@redhat.com
[ Changelog massage. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/entry_64.S | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7b238494b31..3c78a15a537d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -410,26 +410,27 @@ syscall_return:
 	 * a completely clean 64-bit userspace context.
 	 */
 	movq RCX(%rsp),%rcx
-	cmpq %rcx,RIP(%rsp)		/* RCX == RIP */
+	movq RIP(%rsp),%r11
+	cmpq %rcx,%r11			/* RCX == RIP */
 	jne opportunistic_sysret_failed
 
 	/*
 	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
 	 * in kernel space.  This essentially lets the user take over
-	 * the kernel, since userspace controls RSP.  It's not worth
-	 * testing for canonicalness exactly -- this check detects any
-	 * of the 17 high bits set, which is true for non-canonical
-	 * or kernel addresses.  (This will pessimize vsyscall=native.
-	 * Big deal.)
+	 * the kernel, since userspace controls RSP.
 	 *
-	 * If virtual addresses ever become wider, this will need
+	 * If width of "canonical tail" ever becomes variable, this will need
 	 * to be updated to remain correct on both old and new CPUs.
 	 */
 	.ifne __VIRTUAL_MASK_SHIFT - 47
 	.error "virtual address width changed -- SYSRET checks need update"
 	.endif
-	shr $__VIRTUAL_MASK_SHIFT, %rcx
-	jnz opportunistic_sysret_failed
+	/* Change top 16 bits to be the sign-extension of 47th bit */
+	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+	/* If this changed %rcx, it was not canonical */
+	cmpq	%rcx, %r11
+	jne	opportunistic_sysret_failed
 
 	cmpq $__USER_CS,CS(%rsp)	/* CS must match SYSRET */
 	jne opportunistic_sysret_failed
@@ -466,8 +467,8 @@ syscall_return:
 	 */
 syscall_return_via_sysret:
 	CFI_REMEMBER_STATE
-	/* r11 is already restored (see code above) */
-	RESTORE_C_REGS_EXCEPT_R11
+	/* rcx and r11 are already restored (see code above) */
+	RESTORE_C_REGS_EXCEPT_RCX_R11
 	movq RSP(%rsp),%rsp
 	USERGS_SYSRET64
 	CFI_RESTORE_STATE
-- 
cgit v1.2.3


From ac7f5dfb0348a33b2ea92a0c477103c4db45ad4e Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Tue, 21 Apr 2015 18:03:13 +0200
Subject: x86/asm/entry/64: Merge 32-bit execve stubs with x32 ones, as they
 are identical

Run-tested.

Suggested-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429632194-13445-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/entry_64.S | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3c78a15a537d..e952f6bf1d6d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -525,40 +525,27 @@ GLOBAL(stub_execveat)
 	CFI_ENDPROC
 END(stub_execveat)
 
-#ifdef CONFIG_X86_X32_ABI
+#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
 	.align	8
 GLOBAL(stub_x32_execve)
+GLOBAL(stub32_execve)
 	CFI_STARTPROC
 	DEFAULT_FRAME 0, 8
 	call	compat_sys_execve
 	jmp	return_from_execve
 	CFI_ENDPROC
+END(stub32_execve)
 END(stub_x32_execve)
 	.align	8
 GLOBAL(stub_x32_execveat)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8
-	call	compat_sys_execveat
-	jmp	return_from_execve
-	CFI_ENDPROC
-END(stub_x32_execveat)
-#endif
-
-#ifdef CONFIG_IA32_EMULATION
-	.align	8
-GLOBAL(stub32_execve)
-	CFI_STARTPROC
-	call	compat_sys_execve
-	jmp	return_from_execve
-	CFI_ENDPROC
-END(stub32_execve)
-	.align	8
 GLOBAL(stub32_execveat)
 	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
 	call	compat_sys_execveat
 	jmp	return_from_execve
 	CFI_ENDPROC
 END(stub32_execveat)
+END(stub_x32_execveat)
 #endif
 
 /*
-- 
cgit v1.2.3


From 3f5159a9221f19b08275b0a6388ab14392ae4eec Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Tue, 21 Apr 2015 18:03:14 +0200
Subject: x86/asm/entry/32: Update -ENOSYS handling to match the 64-bit logic

Recently Andy changed the 64-bit syscall logic so that
pt_regs->ax is initially set to -ENOSYS, and on syscall exit,
it is updated with the actual return value. This simplified
the logic there.

This patch does the same for 32-bit syscall entry points.

The check for %rax being too big is moved to be just before
the call instruction which dispatches execution through the
syscall table.

There is no way to accidentally skip this check now by jumping
to a label after it. This allows us to remove redundant checks
after ptrace et al.

If %rax is too big, we just skip over the (call, write %rax to
pt_regs->ax) instruction pair. pt_regs->ax remains set to -ENOSYS,
and it gets returned to userspace.

Similar to 64-bit code, this eliminates the "ia32_badsys" code path.

Run-tested.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429632194-13445-2-git-send-email-dvlasenk@redhat.com
[ Changelog massage. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32entry.S | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3cdb9eafbf8c..56fd6dd2e342 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -136,7 +136,7 @@ ENTRY(ia32_sysenter_target)
 	pushq_cfi_reg	rsi			/* pt_regs->si */
 	pushq_cfi_reg	rdx			/* pt_regs->dx */
 	pushq_cfi_reg	rcx			/* pt_regs->cx */
-	pushq_cfi_reg	rax			/* pt_regs->ax */
+	pushq_cfi	$-ENOSYS		/* pt_regs->ax */
 	cld
 	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
 	CFI_ADJUST_CFA_OFFSET 10*8
@@ -163,8 +163,6 @@ sysenter_flags_fixed:
 	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
-	cmpq	$(IA32_NR_syscalls-1),%rax
-	ja	ia32_badsys
 sysenter_do_call:
 	/* 32bit syscall -> 64bit C ABI argument conversion */
 	movl	%edi,%r8d	/* arg5 */
@@ -173,8 +171,11 @@ sysenter_do_call:
 	movl	%ebx,%edi	/* arg1 */
 	movl	%edx,%edx	/* arg3 (zero extension) */
 sysenter_dispatch:
+	cmpq	$(IA32_NR_syscalls-1),%rax
+	ja	1f
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX(%rsp)
+1:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -241,9 +242,7 @@ sysexit_from_sys_call:
 	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
 	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
-	movl RAX(%rsp),%eax	/* reload syscall number */
-	cmpq $(IA32_NR_syscalls-1),%rax
-	ja ia32_badsys
+	movl ORIG_RAX(%rsp),%eax	/* reload syscall number */
 	movl %ebx,%edi			/* reload 1st syscall arg */
 	movl RCX(%rsp),%esi	/* reload 2nd syscall arg */
 	movl RDX(%rsp),%edx	/* reload 3rd syscall arg */
@@ -294,13 +293,10 @@ sysenter_tracesys:
 #endif
 	SAVE_EXTRA_REGS
 	CLEAR_RREGS
-	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
 	LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
-	cmpq	$(IA32_NR_syscalls-1),%rax
-	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
 	CFI_ENDPROC
 ENDPROC(ia32_sysenter_target)
@@ -370,7 +366,7 @@ ENTRY(ia32_cstar_target)
 	pushq_cfi_reg	rdx			/* pt_regs->dx */
 	pushq_cfi_reg	rbp			/* pt_regs->cx */
 	movl	%ebp,%ecx
-	pushq_cfi_reg	rax			/* pt_regs->ax */
+	pushq_cfi	$-ENOSYS		/* pt_regs->ax */
 	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
 	CFI_ADJUST_CFA_OFFSET 10*8
 
@@ -386,8 +382,6 @@ ENTRY(ia32_cstar_target)
 	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
-	cmpq $IA32_NR_syscalls-1,%rax
-	ja  ia32_badsys
 cstar_do_call:
 	/* 32bit syscall -> 64bit C ABI argument conversion */
 	movl	%edi,%r8d	/* arg5 */
@@ -396,8 +390,11 @@ cstar_do_call:
 	movl	%ebx,%edi	/* arg1 */
 	movl	%edx,%edx	/* arg3 (zero extension) */
 cstar_dispatch:
+	cmpq	$(IA32_NR_syscalls-1),%rax
+	ja	1f
 	call *ia32_sys_call_table(,%rax,8)
 	movq %rax,RAX(%rsp)
+1:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -444,14 +441,11 @@ cstar_tracesys:
 	xchgl %r9d,%ebp
 	SAVE_EXTRA_REGS
 	CLEAR_RREGS r9
-	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 1	/* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
-	cmpq $(IA32_NR_syscalls-1),%rax
-	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
 	jmp cstar_do_call
 END(ia32_cstar_target)
 				
@@ -510,7 +504,7 @@ ENTRY(ia32_syscall)
 	pushq_cfi_reg	rsi			/* pt_regs->si */
 	pushq_cfi_reg	rdx			/* pt_regs->dx */
 	pushq_cfi_reg	rcx			/* pt_regs->cx */
-	pushq_cfi_reg	rax			/* pt_regs->ax */
+	pushq_cfi	$-ENOSYS		/* pt_regs->ax */
 	cld
 	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
 	CFI_ADJUST_CFA_OFFSET 10*8
@@ -518,8 +512,6 @@ ENTRY(ia32_syscall)
 	orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
 	testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz ia32_tracesys
-	cmpq $(IA32_NR_syscalls-1),%rax
-	ja ia32_badsys
 ia32_do_call:
 	/* 32bit syscall -> 64bit C ABI argument conversion */
 	movl %edi,%r8d	/* arg5 */
@@ -527,9 +519,12 @@ ia32_do_call:
 	xchg %ecx,%esi	/* rsi:arg2, rcx:arg4 */
 	movl %ebx,%edi	/* arg1 */
 	movl %edx,%edx	/* arg3 (zero extension) */
+	cmpq	$(IA32_NR_syscalls-1),%rax
+	ja	1f
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
 	movq %rax,RAX(%rsp)
+1:
 ia32_ret_from_sys_call:
 	CLEAR_RREGS
 	jmp int_ret_from_sys_call
@@ -537,23 +532,14 @@ ia32_ret_from_sys_call:
 ia32_tracesys:
 	SAVE_EXTRA_REGS
 	CLEAR_RREGS
-	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32	/* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
-	cmpq $(IA32_NR_syscalls-1),%rax
-	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	jmp ia32_do_call
+	CFI_ENDPROC
 END(ia32_syscall)
 
-ia32_badsys:
-	movq $0,ORIG_RAX(%rsp)
-	movq $-ENOSYS,%rax
-	jmp ia32_sysret
-
-	CFI_ENDPROC
-	
 	.macro PTREGSCALL label, func
 	ALIGN
 GLOBAL(\label)
-- 
cgit v1.2.3


From 00425bb181c204c8f250fec122e2817a930e0286 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 24 Apr 2015 08:37:09 +0200
Subject: crypto: x86/sha512_ssse3 - fixup for asm function prototype change

Patch e68410ebf626 ("crypto: x86/sha512_ssse3 - move SHA-384/512
SSSE3 implementation to base layer") changed the prototypes of the
core asm SHA-512 implementations so that they are compatible with
the prototype used by the base layer.

However, in one instance, the register that was used for passing the
input buffer was reused as a scratch register later on in the code,
and since the input buffer param changed places with the digest param
-which needs to be written back before the function returns- this
resulted in the scratch register to be dereferenced in a memory write
operation, causing a GPF.

Fix this by changing the scratch register to use the same register as
the input buffer param again.

Fixes: e68410ebf626 ("crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer")
Reported-By: Bobby Powers <bobbypowers@gmail.com>
Tested-By: Bobby Powers <bobbypowers@gmail.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha512-avx2-asm.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index a4771dcd1fcf..1f20b35d8573 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -79,7 +79,7 @@ NUM_BLKS    = %rdx
 c           = %rcx
 d           = %r8
 e           = %rdx
-y3          = %rdi
+y3          = %rsi
 
 TBL   = %rbp
 
-- 
cgit v1.2.3


From d869844bd081081bf537e806a44811884230643e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 23 Apr 2015 08:33:59 -0700
Subject: x86: fix special __probe_kernel_write() tail zeroing case

Commit cae2a173fe94 ("x86: clean up/fix 'copy_in_user()' tail zeroing")
fixed the failure case tail zeroing of one special case of the x86-64
generic user-copy routine, namely when used for the user-to-user case
("copy_in_user()").

But in the process it broke an even more unusual case: using the user
copy routine for kernel-to-kernel copying.

Now, normally kernel-kernel copies are obviously done using memcpy(),
but we have a couple of special cases when we use the user-copy
functions.  One is when we pass a kernel buffer to a regular user-buffer
routine, using set_fs(KERNEL_DS).  That's a "normal" case, and continued
to work fine, because it never takes any faults (with the possible
exception of a silent and successful vmalloc fault).

But Jan Beulich pointed out another, very unusual, special case: when we
use the user-copy routines not because it's a path that expects a user
pointer, but for a couple of ftrace/kgdb cases that want to do a kernel
copy, but do so using "unsafe" buffers, and use the user-copy routine to
gracefully handle faults.  IOW, for probe_kernel_write().

And that broke for the case of a faulting kernel destination, because we
saw the kernel destination and wanted to try to clear the tail of the
buffer.  Which doesn't work, since that's what faults.

This only triggers for things like kgdb and ftrace users (eg trying
setting a breakpoint on read-only memory), but it's definitely a bug.
The fix is to not compare against the kernel address start (TASK_SIZE),
but instead use the same limits "access_ok()" uses.

Reported-and-tested-by: Jan Beulich <jbeulich@suse.com>
Cc: stable@vger.kernel.org # 4.0
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/lib/usercopy_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 1f33b3d1fd68..0a42327a59d7 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -82,7 +82,7 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
 	clac();
 
 	/* If the destination is a kernel buffer, we always clear the end */
-	if ((unsigned long)to >= TASK_SIZE_MAX)
+	if (!__addr_ok(to))
 		memset(to, 0, len);
 	return len;
 }
-- 
cgit v1.2.3


From 61f01dd941ba9e06d2bf05994450ecc3d61b6b8b Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 26 Apr 2015 16:47:59 -0700
Subject: x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue

AMD CPUs don't reinitialize the SS descriptor on SYSRET, so SYSRET with
SS == 0 results in an invalid usermode state in which SS is apparently
equal to __USER_DS but causes #SS if used.

Work around the issue by setting SS to __KERNEL_DS __switch_to, thus
ensuring that SYSRET never happens with SS set to NULL.

This was exposed by a recent vDSO cleanup.

Fixes: e7d6eefaaa44 x86/vdso32/syscall.S: Do not load __USER32_DS to %ss
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S         |  7 +++++++
 arch/x86/include/asm/cpufeature.h |  1 +
 arch/x86/kernel/cpu/amd.c         |  3 +++
 arch/x86/kernel/entry_64.S        |  9 +++++++++
 arch/x86/kernel/process_64.c      | 28 ++++++++++++++++++++++++++++
 5 files changed, 48 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a821b1cd4fa7..72bf2680f819 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -427,6 +427,13 @@ sysretl_from_sys_call:
 	 * cs and ss are loaded from MSRs.
 	 * (Note: 32bit->32bit SYSRET is different: since r11
 	 * does not exist, it merely sets eflags.IF=1).
+	 *
+	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+	 * descriptor is not reinitialized.  This means that we must
+	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
+	 * exit the kernel, and re-enter using an interrupt vector.  (All
+	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+	 * from happening by reloading SS in __switch_to.
 	 */
 	USERGS_SYSRET32
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ee9b94d9921..3d6606fb97d0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -265,6 +265,7 @@
 #define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fd470ebf924e..e4cf63301ff4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 	if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
 			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
+
+	/* AMD CPUs don't reset SS attributes on SYSRET */
+	set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7b238494b31..02c2eff7478d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,15 @@ system_call_fastpath:
 	 * rflags from r11 (but RF and VM bits are forced to 0),
 	 * cs and ss are loaded from MSRs.
 	 * Restoration of rflags re-enables interrupts.
+	 *
+	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+	 * descriptor is not reinitialized.  This means that we should
+	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
+	 * exit the kernel, and re-enter using an interrupt vector.  (All
+	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+	 * from happening by reloading SS in __switch_to.  (Actually
+	 * detecting the failure in 64-bit userspace is tricky but can be
+	 * done.)
 	 */
 	USERGS_SYSRET64
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4baaa972f52a..ddfdbf74f174 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
+	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
+		/*
+		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
+		 * does not update the cached descriptor.  As a result, if we
+		 * do SYSRET while SS is NULL, we'll end up in user mode with
+		 * SS apparently equal to __USER_DS but actually unusable.
+		 *
+		 * The straightforward workaround would be to fix it up just
+		 * before SYSRET, but that would slow down the system call
+		 * fast paths.  Instead, we ensure that SS is never NULL in
+		 * system call context.  We do this by replacing NULL SS
+		 * selectors at every context switch.  SYSCALL sets up a valid
+		 * SS, so the only way to get NULL is to re-enter the kernel
+		 * from CPL 3 through an interrupt.  Since that can't happen
+		 * in the same task as a running syscall, we are guaranteed to
+		 * context switch between every interrupt vector entry and a
+		 * subsequent SYSRET.
+		 *
+		 * We read SS first because SS reads are much faster than
+		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
+		 * it previously had a different non-NULL value.
+		 */
+		unsigned short ss_sel;
+		savesegment(ss, ss_sel);
+		if (ss_sel != __KERNEL_DS)
+			loadsegment(ss, __KERNEL_DS);
+	}
+
 	return prev_p;
 }
 
-- 
cgit v1.2.3


From 5dca0d9147458be9b9363b8a484aa77d710b412a Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Wed, 25 Mar 2015 12:08:14 +0100
Subject: kvm: x86: fix kvmclock update protocol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kvmclock spec says that the host will increment a version field to
an odd number, then update stuff, then increment it to an even number.
The host is buggy and doesn't do this, and the result is observable
when one vcpu reads another vcpu's kvmclock data.

There's no good way for a guest kernel to keep its vdso from reading
a different vcpu's kvmclock data, but we don't need to care about
changing VCPUs as long as we read a consistent data from kvmclock.
(VCPU can change outside of this loop too, so it doesn't matter if we
return a value not fit for this VCPU.)

Based on a patch by Radim Krčmář.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ed31c31b2485..c73efcd03e29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1669,12 +1669,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		&guest_hv_clock, sizeof(guest_hv_clock))))
 		return 0;
 
-	/*
-	 * The interface expects us to write an even number signaling that the
-	 * update is finished. Since the guest won't see the intermediate
-	 * state, we just increase by 2 at the end.
+	/* This VCPU is paused, but it's legal for a guest to read another
+	 * VCPU's kvmclock, so we really have to follow the specification where
+	 * it says that version is odd if data is being modified, and even after
+	 * it is consistent.
+	 *
+	 * Version field updates must be kept separate.  This is because
+	 * kvm_write_guest_cached might use a "rep movs" instruction, and
+	 * writes within a string instruction are weakly ordered.  So there
+	 * are three writes overall.
+	 *
+	 * As a small optimization, only write the version field in the first
+	 * and third write.  The vcpu->pv_time cache is still valid, because the
+	 * version field is the first in the struct.
 	 */
-	vcpu->hv_clock.version = guest_hv_clock.version + 2;
+	BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+	vcpu->hv_clock.version = guest_hv_clock.version + 1;
+	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+				&vcpu->hv_clock,
+				sizeof(vcpu->hv_clock.version));
+
+	smp_wmb();
 
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
 	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1695,6 +1711,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
 				&vcpu->hv_clock,
 				sizeof(vcpu->hv_clock));
+
+	smp_wmb();
+
+	vcpu->hv_clock.version++;
+	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+				&vcpu->hv_clock,
+				sizeof(vcpu->hv_clock.version));
 	return 0;
 }
 
-- 
cgit v1.2.3


From 73459e2a1ada09a68c02cc5b73f3116fc8194b3d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 23 Apr 2015 13:20:18 +0200
Subject: x86: pvclock: Really remove the sched notifier for cross-cpu
 migrations

This reverts commits 0a4e6be9ca17c54817cf814b4b5aa60478c6df27
and 80f7fdb1c7f0f9266421f823964fd1962681f6ce.

The task migration notifier was originally introduced in order to support
the pvclock vsyscall with non-synchronized TSC, but KVM only supports it
with synchronized TSC.  Hence, on KVM the race condition is only needed
due to a bad implementation on the host side, and even then it's so rare
that it's mostly theoretical.

As far as KVM is concerned it's possible to fix the host, avoiding the
additional complexity in the vDSO and the (re)introduction of the task
migration notifier.

Xen, on the other hand, hasn't yet implemented vsyscall support at
all, so we do not care about its plans for non-synchronized TSC.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/pvclock.h |  1 -
 arch/x86/kernel/pvclock.c      | 44 ------------------------------------------
 arch/x86/vdso/vclock_gettime.c | 34 ++++++++++++++------------------
 include/linux/sched.h          |  8 --------
 kernel/sched/core.c            | 15 --------------
 5 files changed, 15 insertions(+), 87 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 25b1cc07d496..d6b078e9fa28 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
 	struct pvclock_vcpu_time_info pvti;
-	u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index e5ecd20e72dd..2f355d229a58 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
-static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
-
-static struct pvclock_vsyscall_time_info *
-pvclock_get_vsyscall_user_time_info(int cpu)
-{
-	if (!pvclock_vdso_info) {
-		BUG();
-		return NULL;
-	}
-
-	return &pvclock_vdso_info[cpu];
-}
-
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
-{
-	return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
-}
-
 #ifdef CONFIG_X86_64
-static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
-			        void *v)
-{
-	struct task_migration_notifier *mn = v;
-	struct pvclock_vsyscall_time_info *pvti;
-
-	pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
-
-	/* this is NULL when pvclock vsyscall is not initialized */
-	if (unlikely(pvti == NULL))
-		return NOTIFY_DONE;
-
-	pvti->migrate_count++;
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block pvclock_migrate = {
-	.notifier_call = pvclock_task_migrate,
-};
-
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
 
 	WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
-	pvclock_vdso_info = i;
-
 	for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
 		__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
 			     __pa(i) + (idx*PAGE_SIZE),
 			     PAGE_KERNEL_VVAR);
 	}
 
-
-	register_task_migration_notifier(&pvclock_migrate);
-
 	return 0;
 }
 #endif
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 40d2473836c9..9793322751e0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode)
 	cycle_t ret;
 	u64 last;
 	u32 version;
-	u32 migrate_count;
 	u8 flags;
 	unsigned cpu, cpu1;
 
 
 	/*
-	 * When looping to get a consistent (time-info, tsc) pair, we
-	 * also need to deal with the possibility we can switch vcpus,
-	 * so make sure we always re-fetch time-info for the current vcpu.
+	 * Note: hypervisor must guarantee that:
+	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
+	 * 2. that per-CPU pvclock time info is updated if the
+	 *    underlying CPU changes.
+	 * 3. that version is increased whenever underlying CPU
+	 *    changes.
+	 *
 	 */
 	do {
 		cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode)
 		 * __getcpu() calls (Gleb).
 		 */
 
-		/* Make sure migrate_count will change if we leave the VCPU. */
-		do {
-			pvti = get_pvti(cpu);
-			migrate_count = pvti->migrate_count;
-
-			cpu1 = cpu;
-			cpu = __getcpu() & VGETCPU_CPU_MASK;
-		} while (unlikely(cpu != cpu1));
+		pvti = get_pvti(cpu);
 
 		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
 
 		/*
 		 * Test we're still on the cpu as well as the version.
-		 * - We must read TSC of pvti's VCPU.
-		 * - KVM doesn't follow the versioning protocol, so data could
-		 *   change before version if we left the VCPU.
+		 * We could have been migrated just after the first
+		 * vgetcpu but before fetching the version, so we
+		 * wouldn't notice a version change.
 		 */
-		smp_rmb();
-	} while (unlikely((pvti->pvti.version & 1) ||
-			  pvti->pvti.version != version ||
-			  pvti->migrate_count != migrate_count));
+		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
+	} while (unlikely(cpu != cpu1 ||
+			  (pvti->pvti.version & 1) ||
+			  pvti->pvti.version != version));
 
 	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
 		*mode = VCLOCK_NONE;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8222ae40ecb0..26a2e6122734 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
 
-/* Notifier for when a task gets migrated to a new CPU */
-struct task_migration_notifier {
-	struct task_struct *task;
-	int from_cpu;
-	int to_cpu;
-};
-extern void register_task_migration_notifier(struct notifier_block *n);
-
 extern unsigned long get_parent_ip(unsigned long addr);
 
 extern void dump_cpu_task(int cpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f9123a82cbb6..fe22f7510bce 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 		rq_clock_skip_update(rq, true);
 }
 
-static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
-
-void register_task_migration_notifier(struct notifier_block *n)
-{
-	atomic_notifier_chain_register(&task_migration_notifier, n);
-}
-
 #ifdef CONFIG_SMP
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
@@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	trace_sched_migrate_task(p, new_cpu);
 
 	if (task_cpu(p) != new_cpu) {
-		struct task_migration_notifier tmn;
-
 		if (p->sched_class->migrate_task_rq)
 			p->sched_class->migrate_task_rq(p, new_cpu);
 		p->se.nr_migrations++;
 		perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
-
-		tmn.task = p;
-		tmn.from_cpu = task_cpu(p);
-		tmn.to_cpu = new_cpu;
-
-		atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
 	}
 
 	__set_task_cpu(p, new_cpu);
-- 
cgit v1.2.3


From 2b953a5e994ce279904ec70220f7d4f31d380a0a Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Tue, 28 Apr 2015 18:46:20 -0400
Subject: xen: Suspend ticks on all CPUs during suspend

Commit 77e32c89a711 ("clockevents: Manage device's state separately for
the core") decouples clockevent device's modes from states. With this
change when a Xen guest tries to resume, it won't be calling its
set_mode op which needs to be done on each VCPU in order to make the
hypervisor aware that we are in oneshot mode.

This happens because clockevents_tick_resume() (which is an intermediate
step of resuming ticks on a processor) doesn't call clockevents_set_state()
anymore and because during suspend clockevent devices on all VCPUs (except
for the one doing the suspend) are left in ONESHOT state. As result, during
resume the clockevents state machine will assume that device is already
where it should be and doesn't need to be updated.

To avoid this problem we should suspend ticks on all VCPUs during
suspend.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/suspend.c | 10 ++++++++++
 drivers/xen/manage.c   |  9 ++++++---
 include/xen/xen-ops.h  |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9497698645a..53b4c0811f4f 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data)
 	tick_resume_local();
 }
 
+static void xen_vcpu_notify_suspend(void *data)
+{
+	tick_suspend_local();
+}
+
 void xen_arch_resume(void)
 {
 	on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
+
+void xen_arch_suspend(void)
+{
+	on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
+}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index bf1940706422..9e6a85104a20 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -131,6 +131,8 @@ static void do_suspend(void)
 		goto out_resume;
 	}
 
+	xen_arch_suspend();
+
 	si.cancelled = 1;
 
 	err = stop_machine(xen_suspend, &si, cpumask_of(0));
@@ -148,11 +150,12 @@ static void do_suspend(void)
 		si.cancelled = 1;
 	}
 
+	xen_arch_resume();
+
 out_resume:
-	if (!si.cancelled) {
-		xen_arch_resume();
+	if (!si.cancelled)
 		xs_resume();
-	} else
+	else
 		xs_suspend_cancel();
 
 	dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index c643e6a94c9a..0ce4f32017ea 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -13,6 +13,7 @@ void xen_arch_post_suspend(int suspend_cancelled);
 
 void xen_timer_resume(void);
 void xen_arch_resume(void);
+void xen_arch_suspend(void);
 
 void xen_resume_notifier_register(struct notifier_block *nb);
 void xen_resume_notifier_unregister(struct notifier_block *nb);
-- 
cgit v1.2.3


From 2c62e8492ed7358bbe7da51666c7e0f6da9474ee Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 30 Apr 2015 12:41:28 +0800
Subject: x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff] available on
 PCI bus

An IO port or MMIO resource assigned to a PCI host bridge may be
consumed by the host bridge itself or available to its child
bus/devices. The ACPI specification defines a bit (Producer/Consumer)
to tell whether the resource is consumed by the host bridge itself,
but firmware hasn't used that bit consistently, so we can't rely on it.

Before commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource
interfaces to simplify implementation"), arch/x86/pci/acpi.c ignored
all IO port resources defined by acpi_resource_io and
acpi_resource_fixed_io to filter out IO ports consumed by the host
bridge itself.

Commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource interfaces
to simplify implementation") started accepting all IO port and MMIO
resources, which caused a regression that IO port resources consumed
by the host bridge itself became available to its child devices.

Then commit 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
host bridge itself") ignored resources consumed by the host bridge
itself by checking the IORESOURCE_WINDOW flag, which accidently removed
MMIO resources defined by acpi_resource_memory24, acpi_resource_memory32
and acpi_resource_fixed_memory32.

On x86 and IA64 platforms, all IO port and MMIO resources are assumed
to be available to child bus/devices except one special case:
    IO port [0xCF8-0xCFF] is consumed by the host bridge itself
    to access PCI configuration space.

So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. This solution
will also ease the way to consolidate ACPI PCI host bridge common code
from x86, ia64 and ARM64.

Related ACPI table are archived at:
https://bugzilla.kernel.org/show_bug.cgi?id=94221

Related discussions at:
http://patchwork.ozlabs.org/patch/461633/
https://lkml.org/lkml/2015/3/29/304

Fixes: 63f1789ec716 (Ignore resources consumed by host bridge itself)
Reported-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: 4.0+ <stable@vger.kernel.org> # 4.0+
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/pci/acpi.c     | 24 ++++++++++++++++++++++--
 drivers/acpi/resource.c |  2 +-
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index e4695985f9de..d93963340c3c 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
 	kfree(info);
 }
 
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ *     to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
+{
+	return (res->flags & IORESOURCE_IO) &&
+		res->start == 0xCF8 && res->end == 0xCFF;
+}
+
 static void probe_pci_root_info(struct pci_root_info *info,
 				struct acpi_device *device,
 				int busnum, int domain,
@@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info,
 			"no IO and memory resources present in _CRS\n");
 	else
 		resource_list_for_each_entry_safe(entry, tmp, list) {
-			if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
-			    (entry->res->flags & IORESOURCE_DISABLED))
+			if ((entry->res->flags & IORESOURCE_DISABLED) ||
+			    resource_is_pcicfg_ioport(entry->res))
 				resource_list_destroy_entry(entry);
 			else
 				entry->res->name = info->name;
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 5589a6e2a023..8244f013f210 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -573,7 +573,7 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_resources);
  * @ares: Input ACPI resource object.
  * @types: Valid resource types of IORESOURCE_XXX
  *
- * This is a hepler function to support acpi_dev_get_resources(), which filters
+ * This is a helper function to support acpi_dev_get_resources(), which filters
  * ACPI resource objects according to resource types.
  */
 int acpi_dev_filter_resource_type(struct acpi_resource *ares,
-- 
cgit v1.2.3


From e8a4a2696fecb398b0288c43c0e0dbb91e265bb2 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Mon, 4 May 2015 21:15:31 -0700
Subject: x86/spinlocks: Fix regression in spinlock contention detection

A spinlock is regarded as contended when there is at least one waiter.
Currently, the code that checks whether there are any waiters rely on
tail value being greater than head. However, this is not true if tail
reaches the max value and wraps back to zero, so arch_spin_is_contended()
incorrectly returns 0 (not contended) when tail is smaller than head.

The original code (before regression) handled this case by casting the
(tail - head) to an unsigned value. This change simply restores that
behavior.

Fixes: d6abfdb20223 ("x86/spinlocks/paravirt: Fix memory corruption on unlock")
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Cc: peterz@infradead.org
Cc: Waiman.Long@hp.com
Cc: borntraeger@de.ibm.com
Cc: oleg@redhat.com
Cc: raghavendra.kt@linux.vnet.ibm.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1430799331-20445-1-git-send-email-tahsin@google.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/spinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index cf87de3fc390..64b611782ef0 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -169,7 +169,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	tmp.head &= ~TICKET_SLOWPATH_FLAG;
-	return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
 
-- 
cgit v1.2.3


From a71dbdaa8ca2933391b08e0ae5567083e3af0892 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 4 May 2015 11:02:15 -0400
Subject: hypervisor/x86/xen: Unset X86_BUG_SYSRET_SS_ATTRS on Xen PV guests

Commit 61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor
attribute issue") makes AMD processors set SS to __KERNEL_DS in
__switch_to() to deal with cases when SS is NULL.

This breaks Xen PV guests who do not want to load SS with__KERNEL_DS.

Since the problem that the commit is trying to address would have to be
fixed in the hypervisor (if it in fact exists under Xen) there is no
reason to set X86_BUG_SYSRET_SS_ATTRS flag for PV VPCUs here.

This can be easily achieved by adding x86_hyper_xen_hvm.set_cpu_features
op which will clear this flag. (And since this structure is no longer
HVM-specific we should do some renaming).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/include/asm/hypervisor.h |  2 +-
 arch/x86/kernel/cpu/hypervisor.c  |  4 ++--
 arch/x86/xen/enlighten.c          | 27 ++++++++++++++++++---------
 3 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index e42f758a0fbd..055ea9941dd5 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper;
 /* Recognized hypervisors */
 extern const struct hypervisor_x86 x86_hyper_vmware;
 extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_xen;
 extern const struct hypervisor_x86 x86_hyper_kvm;
 
 extern void init_hypervisor(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 36ce402a3fa5..d820d8eae96b 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -27,8 +27,8 @@
 
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
-#ifdef CONFIG_XEN_PVHVM
-	&x86_hyper_xen_hvm,
+#ifdef CONFIG_XEN
+	&x86_hyper_xen,
 #endif
 	&x86_hyper_vmware,
 	&x86_hyper_ms_hyperv,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94578efd3067..46957ead3060 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = {
 
 static void __init xen_hvm_guest_init(void)
 {
+	if (xen_pv_domain())
+		return;
+
 	init_hvm_pv_info();
 
 	xen_hvm_init_shared_info();
@@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void)
 	xen_hvm_init_time_ops();
 	xen_hvm_init_mmu_ops();
 }
+#endif
 
 static bool xen_nopv = false;
 static __init int xen_parse_nopv(char *arg)
@@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg)
 }
 early_param("xen_nopv", xen_parse_nopv);
 
-static uint32_t __init xen_hvm_platform(void)
+static uint32_t __init xen_platform(void)
 {
 	if (xen_nopv)
 		return 0;
 
-	if (xen_pv_domain())
-		return 0;
-
 	return xen_cpuid_base();
 }
 
@@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
-	.name			= "Xen HVM",
-	.detect			= xen_hvm_platform,
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+	if (xen_pv_domain())
+		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+const struct hypervisor_x86 x86_hyper_xen = {
+	.name			= "Xen",
+	.detect			= xen_platform,
+#ifdef CONFIG_XEN_PVHVM
 	.init_platform		= xen_hvm_guest_init,
+#endif
 	.x2apic_available	= xen_x2apic_para_available,
+	.set_cpu_features       = xen_set_cpu_features,
 };
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
-#endif
+EXPORT_SYMBOL(x86_hyper_xen);
-- 
cgit v1.2.3


From de71ad2c97862eae1516aa36528cc3b317c17b2f Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.c.dionne@gmail.com>
Date: Mon, 4 May 2015 15:16:44 -0300
Subject: x86: Make cpu_tss available to external modules

Commit 75182b1632 ("x86/asm/entry: Switch all C consumers of
kernel_stack to this_cpu_sp0()") changed current_thread_info
to use this_cpu_sp0, and indirectly made it rely on init_tss
which was exported with EXPORT_PER_CPU_SYMBOL_GPL.
As a result some macros and inline functions such as set/get_fs,
test_thread_flag and variants have been made unusable for
external modules.

Make cpu_tss exported with EXPORT_PER_CPU_SYMBOL so that these
functions are accessible again, as they were previously.

Signed-off-by: Marc Dionne <marc.dionne@your-file-system.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/1430763404-21221-1-git-send-email-marc.dionne@your-file-system.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8213da62b1b7..bfc99b3b6522 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
 };
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
-- 
cgit v1.2.3


From d9ee948d82203811a545ba26b0172fce4970d1dc Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 17 Dec 2014 18:05:29 -0800
Subject: x86/asm: Use -mskip-rax-setup if supported

GCC 5 added a compiler option, -mskip-rax-setup, for x86-64. It skips
setting up the RAX register when SSE is disabled and there are no
variable arguments passed in vector registers. (According to the x86_64
ABI, %al is used as a hidden register containing the number of vector
registers used).

Since the kernel doesn't pass vector registers to functions with
variable arguments, this option can be used to optimize the x86-64
kernel.

This GCC feature was suggested by Rasmus Villemoes <linux@rasmusvillemoes.dk>.
This is the corresponding kernel change using it.

For kernel v3.17:

      text   data    bss    dec       filename
  11455921 2204048 5853184 19513153   vmlinux #with -mskip-rax-setup
  11480079 2204048 5853184 19537311   vmlinux

For Kernel v4.0+ - custom config:

      text   data    bss    dec       filename
  10231778 3479800 16617472 30329050  vmlinux-gcc5+-mskip-rax-setup
  10268797 3547448 16621568 30437813  vmlinux

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5ba2d9ce82dc..40af1bac2b7d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -84,6 +84,9 @@ else
 	# Use -mpreferred-stack-boundary=3 if supported.
 	KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
 
+	# Use -mskip-rax-setup if supported.
+	KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
+
         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-- 
cgit v1.2.3


From c88d47480d300eaad80c213d50c9bf6077fc49bc Mon Sep 17 00:00:00 2001
From: Bobby Powers <bobbypowers@gmail.com>
Date: Mon, 27 Apr 2015 08:10:41 -0700
Subject: x86/fpu: Always restore_xinit_state() when use_eager_cpu()

The following commit:

  f893959b0898 ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()")

removed drop_init_fpu() usage from flush_thread(). This seems to break
things for me - the Go 1.4 test suite fails all over the place with
floating point comparision errors (offending commit found through
bisection).

The functional change was that flush_thread() after this commit
only calls restore_init_xstate() when both use_eager_fpu() and
!used_math() are true. drop_init_fpu() (now fpu_reset_state()) calls
restore_init_xstate() regardless of whether current used_math() - apply
the same logic here.

Switch used_math() -> tsk_used_math(tsk) to consistently use the grabbed
tsk instead of current, like in the rest of flush_thread().

Tested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Bobby Powers <bobbypowers@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pekka Riikonen <priikone@iki.fi>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suresh Siddha <sbsiddha@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: f893959b ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()")
Link: http://lkml.kernel.org/r/1430147441-9820-1-git-send-email-bobbypowers@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/process.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bfc99b3b6522..6e338e3b1dc0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -156,11 +156,13 @@ void flush_thread(void)
 		/* FPU state will be reallocated lazily at the first use. */
 		drop_fpu(tsk);
 		free_thread_xstate(tsk);
-	} else if (!used_math()) {
-		/* kthread execs. TODO: cleanup this horror. */
-		if (WARN_ON(init_fpu(tsk)))
-			force_sig(SIGKILL, tsk);
-		user_fpu_begin();
+	} else {
+		if (!tsk_used_math(tsk)) {
+			/* kthread execs. TODO: cleanup this horror. */
+			if (WARN_ON(init_fpu(tsk)))
+				force_sig(SIGKILL, tsk);
+			user_fpu_begin();
+		}
 		restore_init_xstate();
 	}
 }
-- 
cgit v1.2.3


From 5b673a48c54594108aec368014efc7334743f06a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 4 Apr 2015 16:40:45 +0200
Subject: x86/alternatives: Document macros

Add some text to the macro magic for future reference and against
failing human memory.

Requested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/alternative-asm.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index bdf02eeee765..e7636bac7372 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,6 +18,12 @@
 	.endm
 #endif
 
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
 .macro altinstruction_entry orig alt feature orig_len alt_len pad_len
 	.long \orig - .
 	.long \alt - .
@@ -27,6 +33,12 @@
 	.byte \pad_len
 .endm
 
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
 .macro ALTERNATIVE oldinstr, newinstr, feature
 140:
 	\oldinstr
@@ -55,6 +67,12 @@
  */
 #define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
 
+
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
 .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
 140:
 	\oldinstr
-- 
cgit v1.2.3


From 8746515d7f04c9ea94cf43e2db1fd2cfca93276d Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 24 Apr 2015 10:16:40 +0100
Subject: xen: Add __GFP_DMA flag when xen_swiotlb_init gets free pages on ARM

Make sure that xen_swiotlb_init allocates buffers that are DMA capable
when at least one memblock is available below 4G. Otherwise we assume
that all devices on the SoC can cope with >4G addresses. We do this on
ARM and ARM64, where dom0 is mapped 1:1, so pfn == mfn in this case.

No functional changes on x86.

From: Chen Baozi <baozich@gmail.com>

Signed-off-by: Chen Baozi <baozich@gmail.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Tested-by: Chen Baozi <baozich@gmail.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/arm/include/asm/xen/page.h |  1 +
 arch/arm/xen/mm.c               | 15 +++++++++++++++
 arch/x86/include/asm/xen/page.h |  5 +++++
 drivers/xen/swiotlb-xen.c       |  2 +-
 4 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 2f7e6ff67d51..0b579b2f4e0e 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 bool xen_arch_need_swiotlb(struct device *dev,
 			   unsigned long pfn,
 			   unsigned long mfn);
+unsigned long xen_get_swiotlb_free_pages(unsigned int order);
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 793551d15f1d..498325074a06 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -4,6 +4,7 @@
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -21,6 +22,20 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
+unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	struct memblock_region *reg;
+	gfp_t flags = __GFP_NOWARN;
+
+	for_each_memblock(memory, reg) {
+		if (reg->base < (phys_addr_t)0xffffffff) {
+			flags |= __GFP_DMA;
+			break;
+		}
+	}
+	return __get_free_pages(flags, order);
+}
+
 enum dma_cache_op {
        DMA_UNMAP,
        DMA_MAP,
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 358dcd338915..c44a5d53e464 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
 	return false;
 }
 
+static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	return __get_free_pages(__GFP_NOWARN, order);
+}
+
 #endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 810ad419e34c..4c549323c605 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -235,7 +235,7 @@ retry:
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-			xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order);
+			xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
 			if (xen_io_tlb_start)
 				break;
 			order--;
-- 
cgit v1.2.3


From dde74f2e4a4447ef838c57e407f7139de3df68cb Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Mon, 27 Apr 2015 15:21:51 +0200
Subject: x86/asm/entry/64: Tidy up JZ insns after TESTs

After TESTs, use logically correct JZ/JNZ mnemonics instead of
JE/JNE. This doesn't change code.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1430140912-7960-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/entry_64.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e952f6bf1d6d..8f8b22a361df 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -666,7 +666,7 @@ END(irq_entries_start)
 	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
 
 	testl $3, CS-RBP(%rsp)
-	je 1f
+	jz	1f
 	SWAPGS
 1:
 	/*
@@ -721,7 +721,7 @@ ret_from_intr:
 	CFI_ADJUST_CFA_OFFSET	RBP
 
 	testl $3,CS(%rsp)
-	je retint_kernel
+	jz	retint_kernel
 	/* Interrupt came from user space */
 
 	GET_THREAD_INFO(%rcx)
@@ -1310,7 +1310,7 @@ ENTRY(error_entry)
 	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
-	je error_kernelspace
+	jz	error_kernelspace
 error_swapgs:
 	SWAPGS
 error_sti:
@@ -1361,7 +1361,7 @@ ENTRY(error_exit)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
 	testl %eax,%eax
-	jne retint_kernel
+	jnz retint_kernel
 	LOCKDEP_SYS_EXIT_IRQ
 	movl TI_flags(%rcx),%edx
 	movl $_TIF_WORK_MASK,%edi
-- 
cgit v1.2.3


From 03335e95e27fc1f2b17b05b27342ad76986b3cf0 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Mon, 27 Apr 2015 15:21:52 +0200
Subject: x86/asm/entry/64: Clean up usage of TEST insns

By the nature of TEST operation, it is often possible
to test a narrower part of the operand:

    "testl $3, mem"  -> "testb $3, mem"

This results in shorter insns, because TEST insn has no
sign-entending byte-immediate forms unlike other ALU ops.

   text	   data	    bss	    dec	    hex	filename
  11674	      0	      0	  11674	   2d9a	entry_64.o.before
  11658	      0	      0	  11658	   2d8a	entry_64.o

Changes in object code:

-	f7 84 24 88 00 00 00 03 00 00 00 	testl  $0x3,0x88(%rsp)
+	f6 84 24 88 00 00 00 03	         	testb  $0x3,0x88(%rsp)
-	f7 44 24 68 03 00 00 00          	testl  $0x3,0x68(%rsp)
+	f6 44 24 68 03                  	testb  $0x3,0x68(%rsp)
-	f7 84 24 90 00 00 00 03 00 00 00	testl  $0x3,0x90(%rsp)
+	f6 84 24 90 00 00 00 03         	testb  $0x3,0x90(%rsp)

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1430140912-7960-2-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/entry_64.S | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8f8b22a361df..60705b032521 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -601,7 +601,7 @@ ENTRY(ret_from_fork)
 
 	RESTORE_EXTRA_REGS
 
-	testl $3,CS(%rsp)			# from kernel_thread?
+	testb	$3, CS(%rsp)			# from kernel_thread?
 
 	/*
 	 * By the time we get here, we have no idea whether our pt_regs,
@@ -665,7 +665,7 @@ END(irq_entries_start)
 
 	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
 
-	testl $3, CS-RBP(%rsp)
+	testb	$3, CS-RBP(%rsp)
 	jz	1f
 	SWAPGS
 1:
@@ -720,7 +720,7 @@ ret_from_intr:
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET	RBP
 
-	testl $3,CS(%rsp)
+	testb	$3, CS(%rsp)
 	jz	retint_kernel
 	/* Interrupt came from user space */
 
@@ -968,7 +968,7 @@ ENTRY(\sym)
 	.if \paranoid
 	.if \paranoid == 1
 	CFI_REMEMBER_STATE
-	testl $3, CS(%rsp)		/* If coming from userspace, switch */
+	testb	$3, CS(%rsp)		/* If coming from userspace, switch */
 	jnz 1f				/* stacks. */
 	.endif
 	call paranoid_entry
@@ -1309,7 +1309,7 @@ ENTRY(error_entry)
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
-	testl $3,CS+8(%rsp)
+	testb	$3, CS+8(%rsp)
 	jz	error_kernelspace
 error_swapgs:
 	SWAPGS
@@ -1606,7 +1606,6 @@ end_repeat_nmi:
 	je 1f
 	movq %r12, %cr2
 1:
-	
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz nmi_restore
 nmi_swapgs:
-- 
cgit v1.2.3


From 2a4e90b18c256d52a7f3f77d58114f6d4e4a7f9f Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 8 May 2015 12:26:02 +0200
Subject: x86: Force inlining of atomic ops

With both gcc 4.7.2 and 4.9.2, sometimes gcc mysteriously
doesn't inline very small functions we expect to be inlined:

$ nm --size-sort vmlinux | grep -iF ' t ' | uniq -c | grep -v '^
*1 ' | sort -rn     473 000000000000000b t spin_unlock_irqrestore
    449 000000000000005f t rcu_read_unlock
    355 0000000000000009 t atomic_inc                <== THIS
    353 000000000000006e t rcu_read_lock
    350 0000000000000075 t rcu_read_lock_sched_held
    291 000000000000000b t spin_unlock
    266 0000000000000019 t arch_local_irq_restore
    215 000000000000000b t spin_lock
    180 0000000000000011 t kzalloc
    165 0000000000000012 t list_add_tail
    161 0000000000000019 t arch_local_save_flags
    153 0000000000000016 t test_and_set_bit
    134 000000000000000b t spin_unlock_irq
    134 0000000000000009 t atomic_dec                <== THIS
    130 000000000000000b t spin_unlock_bh
    122 0000000000000010 t brelse
    120 0000000000000016 t test_and_clear_bit
    120 000000000000000b t spin_lock_irq
    119 000000000000001e t get_dma_ops
    117 0000000000000053 t cpumask_next
    116 0000000000000036 t kref_get
    114 000000000000001a t schedule_work
    106 000000000000000b t spin_lock_bh
    103 0000000000000019 t arch_local_irq_disable
...

Note sizes of marked functions. They are merely 9 bytes long!
Selecting function with 'atomic' in their names:

    355 0000000000000009 t atomic_inc
    134 0000000000000009 t atomic_dec
     98 0000000000000014 t atomic_dec_and_test
     31 000000000000000e t atomic_add_return
     27 000000000000000a t atomic64_inc
     26 000000000000002f t kmap_atomic
     24 0000000000000009 t atomic_add
     12 0000000000000009 t atomic_sub
     10 0000000000000021 t __atomic_add_unless
     10 000000000000000a t atomic64_add
      5 000000000000001f t __atomic_add_unless.constprop.7
      5 000000000000000a t atomic64_dec
      4 000000000000001f t __atomic_add_unless.constprop.18
      4 000000000000001f t __atomic_add_unless.constprop.12
      4 000000000000001f t __atomic_add_unless.constprop.10
      3 000000000000001f t __atomic_add_unless.constprop.13
      3 0000000000000011 t atomic64_add_return
      2 000000000000001f t __atomic_add_unless.constprop.9
      2 000000000000001f t __atomic_add_unless.constprop.8
      2 000000000000001f t __atomic_add_unless.constprop.6
      2 000000000000001f t __atomic_add_unless.constprop.5
      2 000000000000001f t __atomic_add_unless.constprop.3
      2 000000000000001f t __atomic_add_unless.constprop.22
      2 000000000000001f t __atomic_add_unless.constprop.14
      2 000000000000001f t __atomic_add_unless.constprop.11
      2 000000000000001e t atomic_dec_if_positive
      2 0000000000000014 t atomic_inc_and_test
      2 0000000000000011 t atomic_add_return.constprop.4
      2 0000000000000011 t atomic_add_return.constprop.17
      2 0000000000000011 t atomic_add_return.constprop.16
      2 000000000000000d t atomic_inc.constprop.4
      2 000000000000000c t atomic_cmpxchg

This patch fixes this for x86 atomic ops via
s/inline/__always_inline/. This decreases allyesconfig kernel by
about 25k:

    text     data      bss       dec     hex filename
82399481 22255416 20627456 125282353 777a831 vmlinux.before
82375570 22255544 20627456 125258570 7774b4a vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1431080762-17797-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/atomic.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 75a9ee8529f3..e9168955c42f 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -22,7 +22,7 @@
  *
  * Atomically reads the value of @v.
  */
-static inline int atomic_read(const atomic_t *v)
+static __always_inline int atomic_read(const atomic_t *v)
 {
 	return ACCESS_ONCE((v)->counter);
 }
@@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v)
  *
  * Atomically sets the value of @v to @i.
  */
-static inline void atomic_set(atomic_t *v, int i)
+static __always_inline void atomic_set(atomic_t *v, int i)
 {
 	v->counter = i;
 }
@@ -126,7 +126,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic_inc_and_test(atomic_t *v)
+static __always_inline int atomic_inc_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
 }
@@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int atomic_add_negative(int i, atomic_t *v)
+static __always_inline int atomic_add_negative(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
 }
@@ -164,7 +164,7 @@ static __always_inline int atomic_add_return(int i, atomic_t *v)
  *
  * Atomically subtracts @i from @v and returns @v - @i
  */
-static inline int atomic_sub_return(int i, atomic_t *v)
+static __always_inline int atomic_sub_return(int i, atomic_t *v)
 {
 	return atomic_add_return(-i, v);
 }
@@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	return cmpxchg(&v->counter, old, new);
 }
@@ -213,7 +213,7 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
  * Atomically adds 1 to @v
  * Returns the new value of @u
  */
-static inline short int atomic_inc_short(short int *v)
+static __always_inline short int atomic_inc_short(short int *v)
 {
 	asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
 	return *v;
-- 
cgit v1.2.3


From 63332a8455d8310b77d38779c6c21a660a8d9feb Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 24 Apr 2015 17:31:33 +0200
Subject: x86/entry: Stop using PER_CPU_VAR(kernel_stack)

PER_CPU_VAR(kernel_stack) is redundant:

  - On the 64-bit build, we can use PER_CPU_VAR(cpu_tss + TSS_sp0).
  - On the 32-bit build, we can use PER_CPU_VAR(cpu_current_top_of_stack).

PER_CPU_VAR(kernel_stack) will be deleted by a separate change.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429889495-27850-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32entry.S          | 2 +-
 arch/x86/include/asm/thread_info.h | 8 +++++++-
 arch/x86/kernel/entry_64.S         | 2 +-
 arch/x86/xen/xen-asm_64.S          | 5 +++--
 4 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 2ab0f7182df3..1b1330c07971 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -346,7 +346,7 @@ ENTRY(ia32_cstar_target)
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
-	movq	PER_CPU_VAR(kernel_stack),%rsp
+	movq	PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
 	ENABLE_INTERRUPTS(CLBR_NONE)
 
 	/* Zero-extending 32-bit regs, do not remove */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b4bdec3e9523..d656a363e1eb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -198,9 +198,15 @@ static inline unsigned long current_stack_pointer(void)
 #else /* !__ASSEMBLY__ */
 
 /* Load thread_info address into "reg" */
+#ifdef CONFIG_X86_32
 #define GET_THREAD_INFO(reg) \
-	_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
+	_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
 	_ASM_SUB $(THREAD_SIZE),reg ;
+#else
+#define GET_THREAD_INFO(reg) \
+	_ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \
+	_ASM_SUB $(THREAD_SIZE),reg ;
+#endif
 
 /*
  * ASM operand which evaluates to a 'thread_info' address of
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7423e3e2f5c5..c13b86b40176 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -216,7 +216,7 @@ ENTRY(system_call)
 GLOBAL(system_call_after_swapgs)
 
 	movq	%rsp,PER_CPU_VAR(rsp_scratch)
-	movq	PER_CPU_VAR(kernel_stack),%rsp
+	movq	PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
 
 	/* Construct struct pt_regs on stack */
 	pushq_cfi $__USER_DS			/* pt_regs->ss */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index a2cabb8bd6bf..5aa7ec607b9e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
 #include <asm/percpu.h>
 #include <asm/processor-flags.h>
 #include <asm/segment.h>
+#include <asm/asm-offsets.h>
 
 #include <xen/interface/xen.h>
 
@@ -53,7 +54,7 @@ ENTRY(xen_sysret64)
 	 * still with the kernel gs, so we can easily switch back
 	 */
 	movq %rsp, PER_CPU_VAR(rsp_scratch)
-	movq PER_CPU_VAR(kernel_stack), %rsp
+	movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
 
 	pushq $__USER_DS
 	pushq PER_CPU_VAR(rsp_scratch)
@@ -72,7 +73,7 @@ ENTRY(xen_sysret32)
 	 * still with the kernel gs, so we can easily switch back
 	 */
 	movq %rsp, PER_CPU_VAR(rsp_scratch)
-	movq PER_CPU_VAR(kernel_stack), %rsp
+	movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
 
 	pushq $__USER32_DS
 	pushq PER_CPU_VAR(rsp_scratch)
-- 
cgit v1.2.3


From fed7c3f0f750f225317828d691e9eb76eec887b3 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 24 Apr 2015 17:31:34 +0200
Subject: x86/entry: Remove unused 'kernel_stack' per-cpu variable

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429889495-27850-2-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/thread_info.h | 2 --
 arch/x86/kernel/cpu/common.c       | 4 ----
 arch/x86/kernel/process_32.c       | 5 +----
 arch/x86/kernel/process_64.c       | 3 ---
 arch/x86/kernel/smpboot.c          | 2 --
 5 files changed, 1 insertion(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d656a363e1eb..472288962c99 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -177,8 +177,6 @@ struct thread_info {
  */
 #ifndef __ASSEMBLY__
 
-DECLARE_PER_CPU(unsigned long, kernel_stack);
-
 static inline struct thread_info *current_thread_info(void)
 {
 	return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a62cf04dac8a..6bec0b55863e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1155,10 +1155,6 @@ static __init int setup_disablecpuid(char *arg)
 }
 __setup("clearcpuid=", setup_disablecpuid);
 
-DEFINE_PER_CPU(unsigned long, kernel_stack) =
-	(unsigned long)&init_thread_union + THREAD_SIZE;
-EXPORT_PER_CPU_SYMBOL(kernel_stack);
-
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
 struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8ed2106b06da..a99900cedc22 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -302,13 +302,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	arch_end_context_switch(next_p);
 
 	/*
-	 * Reload esp0, kernel_stack, and current_top_of_stack.  This changes
+	 * Reload esp0 and cpu_current_top_of_stack.  This changes
 	 * current_thread_info().
 	 */
 	load_sp0(tss, next);
-	this_cpu_write(kernel_stack,
-		       (unsigned long)task_stack_page(next_p) +
-		       THREAD_SIZE);
 	this_cpu_write(cpu_current_top_of_stack,
 		       (unsigned long)task_stack_page(next_p) +
 		       THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ddfdbf74f174..82134506faa8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -409,9 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* Reload esp0 and ss1.  This changes current_thread_info(). */
 	load_sp0(tss, next);
 
-	this_cpu_write(kernel_stack,
-		(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
-
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
 	 */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 50e547eac8cd..023cccf5a4ae 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -792,8 +792,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
 	clear_tsk_thread_flag(idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
 #endif
-	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
 }
 
 /*
-- 
cgit v1.2.3


From 3a23208e69679597e767cf3547b1a30dd845d9b5 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 24 Apr 2015 17:31:35 +0200
Subject: x86/entry: Define 'cpu_current_top_of_stack' for 64-bit code

32-bit code has PER_CPU_VAR(cpu_current_top_of_stack).
64-bit code uses somewhat more obscure: PER_CPU_VAR(cpu_tss + TSS_sp0).

Define the 'cpu_current_top_of_stack' macro on CONFIG_X86_64
as well so that the PER_CPU_VAR(cpu_current_top_of_stack)
expression can be used in both 32-bit and 64-bit code.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429889495-27850-3-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32entry.S          |  4 ++--
 arch/x86/include/asm/thread_info.h | 10 ++++------
 arch/x86/kernel/entry_64.S         |  2 +-
 arch/x86/xen/xen-asm_64.S          |  5 +++--
 4 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 1b1330c07971..63450a596800 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -113,7 +113,7 @@ ENTRY(ia32_sysenter_target)
 	 * it is too small to ever cause noticeable irq latency.
 	 */
 	SWAPGS_UNSAFE_STACK
-	movq	PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 	ENABLE_INTERRUPTS(CLBR_NONE)
 
 	/* Zero-extending 32-bit regs, do not remove */
@@ -346,7 +346,7 @@ ENTRY(ia32_cstar_target)
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
-	movq	PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
+	movq	PER_CPU_VAR(cpu_current_top_of_stack),%rsp
 	ENABLE_INTERRUPTS(CLBR_NONE)
 
 	/* Zero-extending 32-bit regs, do not remove */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 472288962c99..225ee545e1a0 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -195,16 +195,14 @@ static inline unsigned long current_stack_pointer(void)
 
 #else /* !__ASSEMBLY__ */
 
+#ifdef CONFIG_X86_64
+# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+#endif
+
 /* Load thread_info address into "reg" */
-#ifdef CONFIG_X86_32
 #define GET_THREAD_INFO(reg) \
 	_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
 	_ASM_SUB $(THREAD_SIZE),reg ;
-#else
-#define GET_THREAD_INFO(reg) \
-	_ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \
-	_ASM_SUB $(THREAD_SIZE),reg ;
-#endif
 
 /*
  * ASM operand which evaluates to a 'thread_info' address of
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c13b86b40176..09c3f9e0e07e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -216,7 +216,7 @@ ENTRY(system_call)
 GLOBAL(system_call_after_swapgs)
 
 	movq	%rsp,PER_CPU_VAR(rsp_scratch)
-	movq	PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
+	movq	PER_CPU_VAR(cpu_current_top_of_stack),%rsp
 
 	/* Construct struct pt_regs on stack */
 	pushq_cfi $__USER_DS			/* pt_regs->ss */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 5aa7ec607b9e..04529e620559 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -16,6 +16,7 @@
 #include <asm/processor-flags.h>
 #include <asm/segment.h>
 #include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
 
 #include <xen/interface/xen.h>
 
@@ -54,7 +55,7 @@ ENTRY(xen_sysret64)
 	 * still with the kernel gs, so we can easily switch back
 	 */
 	movq %rsp, PER_CPU_VAR(rsp_scratch)
-	movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
+	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
 	pushq $__USER_DS
 	pushq PER_CPU_VAR(rsp_scratch)
@@ -73,7 +74,7 @@ ENTRY(xen_sysret32)
 	 * still with the kernel gs, so we can easily switch back
 	 */
 	movq %rsp, PER_CPU_VAR(rsp_scratch)
-	movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
+	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
 	pushq $__USER32_DS
 	pushq PER_CPU_VAR(rsp_scratch)
-- 
cgit v1.2.3


From c5bde906d2916d214d78cd8b67d665bf09867033 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sat, 9 May 2015 11:36:50 -0400
Subject: x86/irq: Merge irq_regs & irq_stat

Move irq_regs and irq_stat definitions to irq.c.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431185813-15413-2-git-send-email-brgerst@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/irq.c    | 6 ++++++
 arch/x86/kernel/irq_32.c | 6 ------
 arch/x86/kernel/irq_64.c | 6 ------
 3 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e5952c225532..fe2ed8bb507b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -22,6 +22,12 @@
 #define CREATE_TRACE_POINTS
 #include <asm/trace/irq_vectors.h>
 
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+DEFINE_PER_CPU(struct pt_regs *, irq_regs);
+EXPORT_PER_CPU_SYMBOL(irq_regs);
+
 atomic_t irq_err_count;
 
 /* Function pointer for generic interrupt vector handling */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index f9fd86a7fcc7..cd74f5978ab9 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -21,12 +21,6 @@
 
 #include <asm/apic.h>
 
-DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
-EXPORT_PER_CPU_SYMBOL(irq_stat);
-
-DEFINE_PER_CPU(struct pt_regs *, irq_regs);
-EXPORT_PER_CPU_SYMBOL(irq_regs);
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 
 int sysctl_panic_on_stackoverflow __read_mostly;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 394e643d7830..bc4604e500a3 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -20,12 +20,6 @@
 #include <asm/idle.h>
 #include <asm/apic.h>
 
-DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
-EXPORT_PER_CPU_SYMBOL(irq_stat);
-
-DEFINE_PER_CPU(struct pt_regs *, irq_regs);
-EXPORT_PER_CPU_SYMBOL(irq_regs);
-
 int sysctl_panic_on_stackoverflow;
 
 /*
-- 
cgit v1.2.3


From c6e692f95dacddff5f3607717fb2246c60bbb714 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sat, 9 May 2015 11:36:51 -0400
Subject: x86/asm/entry/irq: Remove unused invalidate_interrupt prototypes

The invalidate_interrupt* functions no longer exist.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431185813-15413-3-git-send-email-brgerst@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/hw_irq.h | 35 -----------------------------------
 1 file changed, 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index e9571ddabc4f..014c6382ffce 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -36,40 +36,6 @@ extern asmlinkage void spurious_interrupt(void);
 extern asmlinkage void thermal_interrupt(void);
 extern asmlinkage void reschedule_interrupt(void);
 
-extern asmlinkage void invalidate_interrupt(void);
-extern asmlinkage void invalidate_interrupt0(void);
-extern asmlinkage void invalidate_interrupt1(void);
-extern asmlinkage void invalidate_interrupt2(void);
-extern asmlinkage void invalidate_interrupt3(void);
-extern asmlinkage void invalidate_interrupt4(void);
-extern asmlinkage void invalidate_interrupt5(void);
-extern asmlinkage void invalidate_interrupt6(void);
-extern asmlinkage void invalidate_interrupt7(void);
-extern asmlinkage void invalidate_interrupt8(void);
-extern asmlinkage void invalidate_interrupt9(void);
-extern asmlinkage void invalidate_interrupt10(void);
-extern asmlinkage void invalidate_interrupt11(void);
-extern asmlinkage void invalidate_interrupt12(void);
-extern asmlinkage void invalidate_interrupt13(void);
-extern asmlinkage void invalidate_interrupt14(void);
-extern asmlinkage void invalidate_interrupt15(void);
-extern asmlinkage void invalidate_interrupt16(void);
-extern asmlinkage void invalidate_interrupt17(void);
-extern asmlinkage void invalidate_interrupt18(void);
-extern asmlinkage void invalidate_interrupt19(void);
-extern asmlinkage void invalidate_interrupt20(void);
-extern asmlinkage void invalidate_interrupt21(void);
-extern asmlinkage void invalidate_interrupt22(void);
-extern asmlinkage void invalidate_interrupt23(void);
-extern asmlinkage void invalidate_interrupt24(void);
-extern asmlinkage void invalidate_interrupt25(void);
-extern asmlinkage void invalidate_interrupt26(void);
-extern asmlinkage void invalidate_interrupt27(void);
-extern asmlinkage void invalidate_interrupt28(void);
-extern asmlinkage void invalidate_interrupt29(void);
-extern asmlinkage void invalidate_interrupt30(void);
-extern asmlinkage void invalidate_interrupt31(void);
-
 extern asmlinkage void irq_move_cleanup_interrupt(void);
 extern asmlinkage void reboot_interrupt(void);
 extern asmlinkage void threshold_interrupt(void);
@@ -178,7 +144,6 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
 extern __visible void smp_reschedule_interrupt(struct pt_regs *);
 extern __visible void smp_call_function_interrupt(struct pt_regs *);
 extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
-extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
 extern char irq_entries_start[];
-- 
cgit v1.2.3


From 51bb92843edcba5a58138cad25ced97923048add Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sat, 9 May 2015 11:36:52 -0400
Subject: x86/asm/entry: Remove SYSCALL_VECTOR

Use IA32_SYSCALL_VECTOR for both compat and native.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431185813-15413-4-git-send-email-brgerst@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/irq_vectors.h | 3 ---
 arch/x86/kernel/traps.c            | 4 ++--
 arch/x86/lguest/boot.c             | 4 ++--
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 666c89ec4bd7..07f27926d473 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -47,9 +47,6 @@
 #define IRQ_MOVE_CLEANUP_VECTOR		FIRST_EXTERNAL_VECTOR
 
 #define IA32_SYSCALL_VECTOR		0x80
-#ifdef CONFIG_X86_32
-# define SYSCALL_VECTOR			0x80
-#endif
 
 /*
  * Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 324ab5247687..5e0791f9d3dc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -997,8 +997,8 @@ void __init trap_init(void)
 #endif
 
 #ifdef CONFIG_X86_32
-	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
-	set_bit(SYSCALL_VECTOR, used_vectors);
+	set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call);
+	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 #endif
 
 	/*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 8f9a133cc099..cab9aaa7802c 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -90,7 +90,7 @@ struct lguest_data lguest_data = {
 	.noirq_iret = (u32)lguest_noirq_iret,
 	.kernel_address = PAGE_OFFSET,
 	.blocked_interrupts = { 1 }, /* Block timer interrupts */
-	.syscall_vec = SYSCALL_VECTOR,
+	.syscall_vec = IA32_SYSCALL_VECTOR,
 };
 
 /*G:037
@@ -866,7 +866,7 @@ static void __init lguest_init_IRQ(void)
 	for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
-		if (i != SYSCALL_VECTOR)
+		if (i != IA32_SYSCALL_VECTOR)
 			set_intr_gate(i, irq_entries_start +
 					8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
-- 
cgit v1.2.3


From 8b455e6577f325289cf2d1b20f493b2fe5c6c316 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sat, 9 May 2015 11:36:53 -0400
Subject: x86/asm/entry/irq: Clean up IRQn_VECTOR macros

Since the ISA irqs are in a single block, use
ISA_IRQ_VECTOR(irq) instead of individual macros.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431185813-15413-5-git-send-email-brgerst@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/irq_vectors.h | 18 +-----------------
 arch/x86/kernel/apic/io_apic.c     |  4 ++--
 arch/x86/kernel/apic/vector.c      |  2 +-
 arch/x86/kernel/i8259.c            |  8 ++++----
 arch/x86/kernel/irqinit.c          |  4 ++--
 5 files changed, 10 insertions(+), 26 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 07f27926d473..117db96ad5fb 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -52,23 +52,7 @@
  * Vectors 0x30-0x3f are used for ISA interrupts.
  *   round up to the next 16-vector boundary
  */
-#define IRQ0_VECTOR			((FIRST_EXTERNAL_VECTOR + 16) & ~15)
-
-#define IRQ1_VECTOR			(IRQ0_VECTOR +  1)
-#define IRQ2_VECTOR			(IRQ0_VECTOR +  2)
-#define IRQ3_VECTOR			(IRQ0_VECTOR +  3)
-#define IRQ4_VECTOR			(IRQ0_VECTOR +  4)
-#define IRQ5_VECTOR			(IRQ0_VECTOR +  5)
-#define IRQ6_VECTOR			(IRQ0_VECTOR +  6)
-#define IRQ7_VECTOR			(IRQ0_VECTOR +  7)
-#define IRQ8_VECTOR			(IRQ0_VECTOR +  8)
-#define IRQ9_VECTOR			(IRQ0_VECTOR +  9)
-#define IRQ10_VECTOR			(IRQ0_VECTOR + 10)
-#define IRQ11_VECTOR			(IRQ0_VECTOR + 11)
-#define IRQ12_VECTOR			(IRQ0_VECTOR + 12)
-#define IRQ13_VECTOR			(IRQ0_VECTOR + 13)
-#define IRQ14_VECTOR			(IRQ0_VECTOR + 14)
-#define IRQ15_VECTOR			(IRQ0_VECTOR + 15)
+#define ISA_IRQ_VECTOR(irq)		(((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq)
 
 /*
  * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f4dc2462a1ac..e01e4117188a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -258,11 +258,11 @@ int __init arch_early_ioapic_init(void)
 
 	/*
 	 * For legacy IRQ's, start with assigning irq0 to irq15 to
-	 * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
+	 * ISA_IRQ_VECTOR(irq) for all cpu's.
 	 */
 	for (i = 0; i < nr_legacy_irqs(); i++) {
 		cfg = alloc_irq_and_cfg_at(i, node);
-		cfg->vector = IRQ0_VECTOR + i;
+		cfg->vector = ISA_IRQ_VECTOR(i);
 		cpumask_setall(cfg->domain);
 	}
 
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6cedd7914581..82d44c314a3f 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -314,7 +314,7 @@ void setup_vector_irq(int cpu)
 	 * legacy vector to irq mapping:
 	 */
 	for (irq = 0; irq < nr_legacy_irqs(); irq++)
-		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
+		per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq;
 
 	__setup_vector_irq(cpu);
 }
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index e7cc5370cd2f..16cb827a5b27 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -329,8 +329,8 @@ static void init_8259A(int auto_eoi)
 	 */
 	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
 
-	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
-	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
+	/* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */
+	outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR);
 
 	/* 8259A-1 (the master) has a slave on IR2 */
 	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
@@ -342,8 +342,8 @@ static void init_8259A(int auto_eoi)
 
 	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
 
-	/* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */
-	outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
+	/* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */
+	outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR);
 	/* 8259A-2 is a slave on master's IR2 */
 	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
 	/* (slave's support for AEOI in flat mode is to be investigated) */
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index cd10a6437264..dc1e08d23552 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -86,7 +86,7 @@ void __init init_IRQ(void)
 	int i;
 
 	/*
-	 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
+	 * On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15.
 	 * If these IRQ's are handled by legacy interrupt-controllers like PIC,
 	 * then this configuration will likely be static after the boot. If
 	 * these IRQ's are handled by more mordern controllers like IO-APIC,
@@ -94,7 +94,7 @@ void __init init_IRQ(void)
 	 * irq's migrate etc.
 	 */
 	for (i = 0; i < nr_legacy_irqs(); i++)
-		per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
+		per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i;
 
 	x86_init.irqs.intr_init();
 }
-- 
cgit v1.2.3


From f21262b8e092a770e39fbd405cc18a0247c3af68 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 11 May 2015 10:15:46 +0200
Subject: x86/alternatives: Switch AMD F15h and later to the P6 NOPs

Software optimization guides for both F15h and F16h cite those
NOPs as the optimal ones. A microbenchmark confirms that
actually even older families are better with the single-insn
NOPs so switch to them for the alternatives.

Cycles count below includes the loop overhead of the measurement
but that overhead is the same with all runs.

	F10h, revE:
	-----------
	Running NOP tests, 1000 NOPs x 1000000 repetitions

	K8:
			      90     288.212282 cycles
			   66 90     288.220840 cycles
			66 66 90     288.219447 cycles
		     66 66 66 90     288.223204 cycles
		  66 66 90 66 90     571.393424 cycles
	       66 66 90 66 66 90     571.374919 cycles
	    66 66 66 90 66 66 90     572.249281 cycles
	 66 66 66 90 66 66 66 90     571.388651 cycles

	P6:
			      90     288.214193 cycles
			   66 90     288.225550 cycles
			0f 1f 00     288.224441 cycles
		     0f 1f 40 00     288.225030 cycles
		  0f 1f 44 00 00     288.233558 cycles
	       66 0f 1f 44 00 00     324.792342 cycles
	    0f 1f 80 00 00 00 00     325.657462 cycles
	 0f 1f 84 00 00 00 00 00     430.246643 cycles

	F14h:
	----
	Running NOP tests, 1000 NOPs x 1000000 repetitions

	K8:
			      90     510.404890 cycles
			   66 90     510.432117 cycles
			66 66 90     510.561858 cycles
		     66 66 66 90     510.541865 cycles
		  66 66 90 66 90    1014.192782 cycles
	       66 66 90 66 66 90    1014.226546 cycles
	    66 66 66 90 66 66 90    1014.334299 cycles
	 66 66 66 90 66 66 66 90    1014.381205 cycles

	P6:
			      90     510.436710 cycles
			   66 90     510.448229 cycles
			0f 1f 00     510.545100 cycles
		     0f 1f 40 00     510.502792 cycles
		  0f 1f 44 00 00     510.589517 cycles
	       66 0f 1f 44 00 00     510.611462 cycles
	    0f 1f 80 00 00 00 00     511.166794 cycles
	 0f 1f 84 00 00 00 00 00     511.651641 cycles

	F15h:
	-----
	Running NOP tests, 1000 NOPs x 1000000 repetitions

	K8:
			      90     243.128396 cycles
			   66 90     243.129883 cycles
			66 66 90     243.131631 cycles
		     66 66 66 90     242.499324 cycles
		  66 66 90 66 90     481.829083 cycles
	       66 66 90 66 66 90     481.884413 cycles
	    66 66 66 90 66 66 90     481.851446 cycles
	 66 66 66 90 66 66 66 90     481.409220 cycles

	P6:
			      90     243.127026 cycles
			   66 90     243.130711 cycles
			0f 1f 00     243.122747 cycles
		     0f 1f 40 00     242.497617 cycles
		  0f 1f 44 00 00     245.354461 cycles
	       66 0f 1f 44 00 00     361.930417 cycles
	    0f 1f 80 00 00 00 00     362.844944 cycles
	 0f 1f 84 00 00 00 00 00     480.514948 cycles

	F16h:
	-----
	Running NOP tests, 1000 NOPs x 1000000 repetitions

	K8:
			      90     507.793298 cycles
			   66 90     507.789636 cycles
			66 66 90     507.826490 cycles
		     66 66 66 90     507.859075 cycles
		  66 66 90 66 90    1008.663129 cycles
	       66 66 90 66 66 90    1008.696259 cycles
	    66 66 66 90 66 66 90    1008.692517 cycles
	 66 66 66 90 66 66 66 90    1008.755399 cycles

	P6:
			      90     507.795232 cycles
			   66 90     507.794761 cycles
			0f 1f 00     507.834901 cycles
		     0f 1f 40 00     507.822629 cycles
		  0f 1f 44 00 00     507.838493 cycles
	       66 0f 1f 44 00 00     507.908597 cycles
	    0f 1f 80 00 00 00 00     507.946417 cycles
	 0f 1f 84 00 00 00 00 00     507.954960 cycles

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1431332153-18566-2-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/alternative.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index aef653193160..b0932c4341b3 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -227,6 +227,15 @@ void __init arch_init_ideal_nops(void)
 #endif
 		}
 		break;
+
+	case X86_VENDOR_AMD:
+		if (boot_cpu_data.x86 > 0xf) {
+			ideal_nops = p6_nops;
+			return;
+		}
+
+		/* fall through */
+
 	default:
 #ifdef CONFIG_X86_64
 		ideal_nops = k8_nops;
-- 
cgit v1.2.3