From 89f9f6796d41e10e224b0cb0027ddd78cb881f65 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 27 Feb 2015 11:25:59 -0800 Subject: hv: vmbus_post_msg: retry the hypercall on some transient errors I got HV_STATUS_INVALID_CONNECTION_ID on Hyper-V 2008 R2 when keeping running "rmmod hv_netvsc; modprobe hv_netvsc; rmmod hv_utils; modprobe hv_utils" in a Linux guest. Looks the host has some kind of throttling mechanism if some kinds of hypercalls are sent too frequently. Without the patch, the driver can occasionally fail to load. Also let's retry HV_STATUS_INSUFFICIENT_MEMORY, though we didn't get it before. Removed 'case -ENOMEM', since the hypervisor doesn't return this. CC: "K. Y. Srinivasan" Reviewed-by: Jason Wang Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/hyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 90c458e66e13..ce6068dbcfbc 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -225,6 +225,8 @@ #define HV_STATUS_INVALID_HYPERCALL_CODE 2 #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 #define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 typedef struct _HV_REFERENCE_TSC_PAGE { -- cgit v1.2.3 From 1bd187de536494a27925902b9653e9ef0ace4774 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 23 Feb 2015 16:24:44 +0200 Subject: x86, intel-mid: remove Intel MID specific serial support Since we have a native 8250 driver carrying the Intel MID serial devices the specific support is not needed anymore. This patch removes it for Intel MID. Note that the console device name is changed from ttyMFDx to ttySx. Signed-off-by: Andy Shevchenko Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig.debug | 4 - arch/x86/include/asm/intel-mid.h | 3 - arch/x86/kernel/early_printk.c | 6 - arch/x86/platform/intel-mid/Makefile | 1 - .../platform/intel-mid/early_printk_intel_mid.c | 112 -- drivers/tty/serial/Kconfig | 10 - drivers/tty/serial/Makefile | 1 - drivers/tty/serial/mfd.c | 1505 -------------------- include/linux/serial_mfd.h | 47 - include/uapi/linux/serial_reg.h | 19 - 10 files changed, 1708 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/early_printk_intel_mid.c delete mode 100644 drivers/tty/serial/mfd.c delete mode 100644 include/linux/serial_mfd.h (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 20028da8ae18..72484a645f05 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -43,10 +43,6 @@ config EARLY_PRINTK with klogd/syslogd or the X server. You should normally N here, unless you want to debug such a crash. -config EARLY_PRINTK_INTEL_MID - bool "Early printk for Intel MID platform support" - depends on EARLY_PRINTK && X86_INTEL_MID - config EARLY_PRINTK_DBGP bool "Early printk via EHCI debug port" depends on EARLY_PRINTK && PCI diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 705d35708a50..7c5af123bdbd 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options; #define SFI_MTMR_MAX_NUM 8 #define SFI_MRTC_MAX 8 -extern struct console early_hsu_console; -extern void hsu_early_console_init(const char *); - extern void intel_scu_devices_create(void); extern void intel_scu_devices_destroy(void); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index a62536a1be88..f85e3fb50f28 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf) if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_EARLY_PRINTK_INTEL_MID - if (!strncmp(buf, "hsu", 3)) { - hsu_early_console_init(buf + 3); - early_console_register(&early_hsu_console, keep); - } -#endif #ifdef CONFIG_EARLY_PRINTK_EFI if (!strncmp(buf, "efi", 3)) early_console_register(&early_efi_console, keep); diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index 0a8ee703b9fa..0ce1b1913673 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile @@ -1,5 +1,4 @@ obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o -obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o # SFI specific code ifdef CONFIG_X86_INTEL_MID diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c deleted file mode 100644 index 4e720829ab90..000000000000 --- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * early_printk_intel_mid.c - early consoles for Intel MID platforms - * - * Copyright (c) 2008-2010, Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -/* - * This file implements early console named hsu. - * hsu is based on a High Speed UART device which only exists in the Medfield - * platform - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Following is the early console based on Medfield HSU (High - * Speed UART) device. - */ -#define HSU_PORT_BASE 0xffa28080 - -static void __iomem *phsu; - -void hsu_early_console_init(const char *s) -{ - unsigned long paddr, port = 0; - u8 lcr; - - /* - * Select the early HSU console port if specified by user in the - * kernel command line. - */ - if (*s && !kstrtoul(s, 10, &port)) - port = clamp_val(port, 0, 2); - - paddr = HSU_PORT_BASE + port * 0x80; - phsu = (void __iomem *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); - - /* Disable FIFO */ - writeb(0x0, phsu + UART_FCR); - - /* Set to default 115200 bps, 8n1 */ - lcr = readb(phsu + UART_LCR); - writeb((0x80 | lcr), phsu + UART_LCR); - writeb(0x18, phsu + UART_DLL); - writeb(lcr, phsu + UART_LCR); - writel(0x3600, phsu + UART_MUL*4); - - writeb(0x8, phsu + UART_MCR); - writeb(0x7, phsu + UART_FCR); - writeb(0x3, phsu + UART_LCR); - - /* Clear IRQ status */ - readb(phsu + UART_LSR); - readb(phsu + UART_RX); - readb(phsu + UART_IIR); - readb(phsu + UART_MSR); - - /* Enable FIFO */ - writeb(0x7, phsu + UART_FCR); -} - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -static void early_hsu_putc(char ch) -{ - unsigned int timeout = 10000; /* 10ms */ - u8 status; - - while (--timeout) { - status = readb(phsu + UART_LSR); - if (status & BOTH_EMPTY) - break; - udelay(1); - } - - /* Only write the char when there was no timeout */ - if (timeout) - writeb(ch, phsu + UART_TX); -} - -static void early_hsu_write(struct console *con, const char *str, unsigned n) -{ - int i; - - for (i = 0; i < n && *str; i++) { - if (*str == '\n') - early_hsu_putc('\r'); - early_hsu_putc(*str); - str++; - } -} - -struct console early_hsu_console = { - .name = "earlyhsu", - .write = early_hsu_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 60c368a5dbf1..c9dbc626038d 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -483,16 +483,6 @@ config SERIAL_SA1100_CONSOLE your boot loader (lilo or loadlin) about how to pass options to the kernel at boot time.) -config SERIAL_MFD_HSU - tristate "Medfield High Speed UART support" - depends on PCI - select SERIAL_CORE - -config SERIAL_MFD_HSU_CONSOLE - bool "Medfield HSU serial console support" - depends on SERIAL_MFD_HSU=y - select SERIAL_CORE_CONSOLE - config SERIAL_BFIN tristate "Blackfin serial port support" depends on BLACKFIN diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 599be4b05a26..f42b4f9845df 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -78,7 +78,6 @@ obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o obj-$(CONFIG_SERIAL_VT8500) += vt8500_serial.o -obj-$(CONFIG_SERIAL_MFD_HSU) += mfd.o obj-$(CONFIG_SERIAL_IFX6X60) += ifx6x60.o obj-$(CONFIG_SERIAL_PCH_UART) += pch_uart.o obj-$(CONFIG_SERIAL_MSM_SMD) += msm_smd_tty.o diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c deleted file mode 100644 index 8fe4501d7565..000000000000 --- a/drivers/tty/serial/mfd.c +++ /dev/null @@ -1,1505 +0,0 @@ -/* - * mfd.c: driver for High Speed UART device of Intel Medfield platform - * - * Refer pxa.c, 8250.c and some other drivers in drivers/serial/ - * - * (C) Copyright 2010 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -/* Notes: - * 1. DMA channel allocation: 0/1 channel are assigned to port 0, - * 2/3 chan to port 1, 4/5 chan to port 3. Even number chans - * are used for RX, odd chans for TX - * - * 2. The RI/DSR/DCD/DTR are not pinned out, DCD & DSR are always - * asserted, only when the HW is reset the DDCD and DDSR will - * be triggered - */ - -#if defined(CONFIG_SERIAL_MFD_HSU_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) -#define SUPPORT_SYSRQ -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define HSU_DMA_BUF_SIZE 2048 - -#define chan_readl(chan, offset) readl(chan->reg + offset) -#define chan_writel(chan, offset, val) writel(val, chan->reg + offset) - -#define mfd_readl(obj, offset) readl(obj->reg + offset) -#define mfd_writel(obj, offset, val) writel(val, obj->reg + offset) - -static int hsu_dma_enable; -module_param(hsu_dma_enable, int, 0); -MODULE_PARM_DESC(hsu_dma_enable, - "It is a bitmap to set working mode, if bit[x] is 1, then port[x] will work in DMA mode, otherwise in PIO mode."); - -struct hsu_dma_buffer { - u8 *buf; - dma_addr_t dma_addr; - u32 dma_size; - u32 ofs; -}; - -struct hsu_dma_chan { - u32 id; - enum dma_data_direction dirt; - struct uart_hsu_port *uport; - void __iomem *reg; -}; - -struct uart_hsu_port { - struct uart_port port; - unsigned char ier; - unsigned char lcr; - unsigned char mcr; - unsigned int lsr_break_flag; - char name[12]; - int index; - struct device *dev; - - struct hsu_dma_chan *txc; - struct hsu_dma_chan *rxc; - struct hsu_dma_buffer txbuf; - struct hsu_dma_buffer rxbuf; - int use_dma; /* flag for DMA/PIO */ - int running; - int dma_tx_on; -}; - -/* Top level data structure of HSU */ -struct hsu_port { - void __iomem *reg; - unsigned long paddr; - unsigned long iolen; - u32 irq; - - struct uart_hsu_port port[3]; - struct hsu_dma_chan chans[10]; - - struct dentry *debugfs; -}; - -static inline unsigned int serial_in(struct uart_hsu_port *up, int offset) -{ - unsigned int val; - - if (offset > UART_MSR) { - offset <<= 2; - val = readl(up->port.membase + offset); - } else - val = (unsigned int)readb(up->port.membase + offset); - - return val; -} - -static inline void serial_out(struct uart_hsu_port *up, int offset, int value) -{ - if (offset > UART_MSR) { - offset <<= 2; - writel(value, up->port.membase + offset); - } else { - unsigned char val = value & 0xff; - writeb(val, up->port.membase + offset); - } -} - -#ifdef CONFIG_DEBUG_FS - -#define HSU_REGS_BUFSIZE 1024 - - -static ssize_t port_show_regs(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct uart_hsu_port *up = file->private_data; - char *buf; - u32 len = 0; - ssize_t ret; - - buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL); - if (!buf) - return 0; - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MFD HSU port[%d] regs:\n", up->index); - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "=================================\n"); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "IER: \t\t0x%08x\n", serial_in(up, UART_IER)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "IIR: \t\t0x%08x\n", serial_in(up, UART_IIR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "LCR: \t\t0x%08x\n", serial_in(up, UART_LCR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MCR: \t\t0x%08x\n", serial_in(up, UART_MCR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "LSR: \t\t0x%08x\n", serial_in(up, UART_LSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MSR: \t\t0x%08x\n", serial_in(up, UART_MSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "FOR: \t\t0x%08x\n", serial_in(up, UART_FOR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "PS: \t\t0x%08x\n", serial_in(up, UART_PS)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MUL: \t\t0x%08x\n", serial_in(up, UART_MUL)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "DIV: \t\t0x%08x\n", serial_in(up, UART_DIV)); - - if (len > HSU_REGS_BUFSIZE) - len = HSU_REGS_BUFSIZE; - - ret = simple_read_from_buffer(user_buf, count, ppos, buf, len); - kfree(buf); - return ret; -} - -static ssize_t dma_show_regs(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hsu_dma_chan *chan = file->private_data; - char *buf; - u32 len = 0; - ssize_t ret; - - buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL); - if (!buf) - return 0; - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MFD HSU DMA channel [%d] regs:\n", chan->id); - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "=================================\n"); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "CR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_CR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "DCR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_DCR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "BSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_BSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MOTSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_MOTSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0TSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1TSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2TSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3TSR)); - - if (len > HSU_REGS_BUFSIZE) - len = HSU_REGS_BUFSIZE; - - ret = simple_read_from_buffer(user_buf, count, ppos, buf, len); - kfree(buf); - return ret; -} - -static const struct file_operations port_regs_ops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = port_show_regs, - .llseek = default_llseek, -}; - -static const struct file_operations dma_regs_ops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = dma_show_regs, - .llseek = default_llseek, -}; - -static int hsu_debugfs_init(struct hsu_port *hsu) -{ - int i; - char name[32]; - - hsu->debugfs = debugfs_create_dir("hsu", NULL); - if (!hsu->debugfs) - return -ENOMEM; - - for (i = 0; i < 3; i++) { - snprintf(name, sizeof(name), "port_%d_regs", i); - debugfs_create_file(name, S_IFREG | S_IRUGO, - hsu->debugfs, (void *)(&hsu->port[i]), &port_regs_ops); - } - - for (i = 0; i < 6; i++) { - snprintf(name, sizeof(name), "dma_chan_%d_regs", i); - debugfs_create_file(name, S_IFREG | S_IRUGO, - hsu->debugfs, (void *)&hsu->chans[i], &dma_regs_ops); - } - - return 0; -} - -static void hsu_debugfs_remove(struct hsu_port *hsu) -{ - if (hsu->debugfs) - debugfs_remove_recursive(hsu->debugfs); -} - -#else -static inline int hsu_debugfs_init(struct hsu_port *hsu) -{ - return 0; -} - -static inline void hsu_debugfs_remove(struct hsu_port *hsu) -{ -} -#endif /* CONFIG_DEBUG_FS */ - -static void serial_hsu_enable_ms(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - - up->ier |= UART_IER_MSI; - serial_out(up, UART_IER, up->ier); -} - -static void hsu_dma_tx(struct uart_hsu_port *up) -{ - struct circ_buf *xmit = &up->port.state->xmit; - struct hsu_dma_buffer *dbuf = &up->txbuf; - int count; - - /* test_and_set_bit may be better, but anyway it's in lock protected mode */ - if (up->dma_tx_on) - return; - - /* Update the circ buf info */ - xmit->tail += dbuf->ofs; - xmit->tail &= UART_XMIT_SIZE - 1; - - up->port.icount.tx += dbuf->ofs; - dbuf->ofs = 0; - - /* Disable the channel */ - chan_writel(up->txc, HSU_CH_CR, 0x0); - - if (!uart_circ_empty(xmit) && !uart_tx_stopped(&up->port)) { - dma_sync_single_for_device(up->port.dev, - dbuf->dma_addr, - dbuf->dma_size, - DMA_TO_DEVICE); - - count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); - dbuf->ofs = count; - - /* Reprogram the channel */ - chan_writel(up->txc, HSU_CH_D0SAR, dbuf->dma_addr + xmit->tail); - chan_writel(up->txc, HSU_CH_D0TSR, count); - - /* Reenable the channel */ - chan_writel(up->txc, HSU_CH_DCR, 0x1 - | (0x1 << 8) - | (0x1 << 16) - | (0x1 << 24)); - up->dma_tx_on = 1; - chan_writel(up->txc, HSU_CH_CR, 0x1); - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); -} - -/* The buffer is already cache coherent */ -static void hsu_dma_start_rx_chan(struct hsu_dma_chan *rxc, - struct hsu_dma_buffer *dbuf) -{ - dbuf->ofs = 0; - - chan_writel(rxc, HSU_CH_BSR, 32); - chan_writel(rxc, HSU_CH_MOTSR, 4); - - chan_writel(rxc, HSU_CH_D0SAR, dbuf->dma_addr); - chan_writel(rxc, HSU_CH_D0TSR, dbuf->dma_size); - chan_writel(rxc, HSU_CH_DCR, 0x1 | (0x1 << 8) - | (0x1 << 16) - | (0x1 << 24) /* timeout bit, see HSU Errata 1 */ - ); - chan_writel(rxc, HSU_CH_CR, 0x3); -} - -/* Protected by spin_lock_irqsave(port->lock) */ -static void serial_hsu_start_tx(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - - if (up->use_dma) { - hsu_dma_tx(up); - } else if (!(up->ier & UART_IER_THRI)) { - up->ier |= UART_IER_THRI; - serial_out(up, UART_IER, up->ier); - } -} - -static void serial_hsu_stop_tx(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - struct hsu_dma_chan *txc = up->txc; - - if (up->use_dma) - chan_writel(txc, HSU_CH_CR, 0x0); - else if (up->ier & UART_IER_THRI) { - up->ier &= ~UART_IER_THRI; - serial_out(up, UART_IER, up->ier); - } -} - -/* This is always called in spinlock protected mode, so - * modify timeout timer is safe here */ -static void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts, - unsigned long *flags) -{ - struct hsu_dma_buffer *dbuf = &up->rxbuf; - struct hsu_dma_chan *chan = up->rxc; - struct uart_port *port = &up->port; - struct tty_port *tport = &port->state->port; - int count; - - /* - * First need to know how many is already transferred, - * then check if its a timeout DMA irq, and return - * the trail bytes out, push them up and reenable the - * channel - */ - - /* Timeout IRQ, need wait some time, see Errata 2 */ - if (int_sts & 0xf00) - udelay(2); - - /* Stop the channel */ - chan_writel(chan, HSU_CH_CR, 0x0); - - count = chan_readl(chan, HSU_CH_D0SAR) - dbuf->dma_addr; - if (!count) { - /* Restart the channel before we leave */ - chan_writel(chan, HSU_CH_CR, 0x3); - return; - } - - dma_sync_single_for_cpu(port->dev, dbuf->dma_addr, - dbuf->dma_size, DMA_FROM_DEVICE); - - /* - * Head will only wrap around when we recycle - * the DMA buffer, and when that happens, we - * explicitly set tail to 0. So head will - * always be greater than tail. - */ - tty_insert_flip_string(tport, dbuf->buf, count); - port->icount.rx += count; - - dma_sync_single_for_device(up->port.dev, dbuf->dma_addr, - dbuf->dma_size, DMA_FROM_DEVICE); - - /* Reprogram the channel */ - chan_writel(chan, HSU_CH_D0SAR, dbuf->dma_addr); - chan_writel(chan, HSU_CH_D0TSR, dbuf->dma_size); - chan_writel(chan, HSU_CH_DCR, 0x1 - | (0x1 << 8) - | (0x1 << 16) - | (0x1 << 24) /* timeout bit, see HSU Errata 1 */ - ); - spin_unlock_irqrestore(&up->port.lock, *flags); - tty_flip_buffer_push(tport); - spin_lock_irqsave(&up->port.lock, *flags); - - chan_writel(chan, HSU_CH_CR, 0x3); - -} - -static void serial_hsu_stop_rx(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - struct hsu_dma_chan *chan = up->rxc; - - if (up->use_dma) - chan_writel(chan, HSU_CH_CR, 0x2); - else { - up->ier &= ~UART_IER_RLSI; - up->port.read_status_mask &= ~UART_LSR_DR; - serial_out(up, UART_IER, up->ier); - } -} - -static inline void receive_chars(struct uart_hsu_port *up, int *status, - unsigned long *flags) -{ - unsigned int ch, flag; - unsigned int max_count = 256; - - do { - ch = serial_in(up, UART_RX); - flag = TTY_NORMAL; - up->port.icount.rx++; - - if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) { - - dev_warn(up->dev, "We really rush into ERR/BI case" - "status = 0x%02x", *status); - /* For statistics only */ - if (*status & UART_LSR_BI) { - *status &= ~(UART_LSR_FE | UART_LSR_PE); - up->port.icount.brk++; - /* - * We do the SysRQ and SAK checking - * here because otherwise the break - * may get masked by ignore_status_mask - * or read_status_mask. - */ - if (uart_handle_break(&up->port)) - goto ignore_char; - } else if (*status & UART_LSR_PE) - up->port.icount.parity++; - else if (*status & UART_LSR_FE) - up->port.icount.frame++; - if (*status & UART_LSR_OE) - up->port.icount.overrun++; - - /* Mask off conditions which should be ignored. */ - *status &= up->port.read_status_mask; - -#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE - if (up->port.cons && - up->port.cons->index == up->port.line) { - /* Recover the break flag from console xmit */ - *status |= up->lsr_break_flag; - up->lsr_break_flag = 0; - } -#endif - if (*status & UART_LSR_BI) { - flag = TTY_BREAK; - } else if (*status & UART_LSR_PE) - flag = TTY_PARITY; - else if (*status & UART_LSR_FE) - flag = TTY_FRAME; - } - - if (uart_handle_sysrq_char(&up->port, ch)) - goto ignore_char; - - uart_insert_char(&up->port, *status, UART_LSR_OE, ch, flag); - ignore_char: - *status = serial_in(up, UART_LSR); - } while ((*status & UART_LSR_DR) && max_count--); - - spin_unlock_irqrestore(&up->port.lock, *flags); - tty_flip_buffer_push(&up->port.state->port); - spin_lock_irqsave(&up->port.lock, *flags); -} - -static void transmit_chars(struct uart_hsu_port *up) -{ - struct circ_buf *xmit = &up->port.state->xmit; - int count; - - if (up->port.x_char) { - serial_out(up, UART_TX, up->port.x_char); - up->port.icount.tx++; - up->port.x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) { - serial_hsu_stop_tx(&up->port); - return; - } - - /* The IRQ is for TX FIFO half-empty */ - count = up->port.fifosize / 2; - - do { - serial_out(up, UART_TX, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - - up->port.icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); - - if (uart_circ_empty(xmit)) - serial_hsu_stop_tx(&up->port); -} - -static inline void check_modem_status(struct uart_hsu_port *up) -{ - int status; - - status = serial_in(up, UART_MSR); - - if ((status & UART_MSR_ANY_DELTA) == 0) - return; - - if (status & UART_MSR_TERI) - up->port.icount.rng++; - if (status & UART_MSR_DDSR) - up->port.icount.dsr++; - /* We may only get DDCD when HW init and reset */ - if (status & UART_MSR_DDCD) - uart_handle_dcd_change(&up->port, status & UART_MSR_DCD); - /* Will start/stop_tx accordingly */ - if (status & UART_MSR_DCTS) - uart_handle_cts_change(&up->port, status & UART_MSR_CTS); - - wake_up_interruptible(&up->port.state->port.delta_msr_wait); -} - -/* - * This handles the interrupt from one port. - */ -static irqreturn_t port_irq(int irq, void *dev_id) -{ - struct uart_hsu_port *up = dev_id; - unsigned int iir, lsr; - unsigned long flags; - - if (unlikely(!up->running)) - return IRQ_NONE; - - spin_lock_irqsave(&up->port.lock, flags); - if (up->use_dma) { - lsr = serial_in(up, UART_LSR); - if (unlikely(lsr & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) - dev_warn(up->dev, - "Got lsr irq while using DMA, lsr = 0x%2x\n", - lsr); - check_modem_status(up); - spin_unlock_irqrestore(&up->port.lock, flags); - return IRQ_HANDLED; - } - - iir = serial_in(up, UART_IIR); - if (iir & UART_IIR_NO_INT) { - spin_unlock_irqrestore(&up->port.lock, flags); - return IRQ_NONE; - } - - lsr = serial_in(up, UART_LSR); - if (lsr & UART_LSR_DR) - receive_chars(up, &lsr, &flags); - check_modem_status(up); - - /* lsr will be renewed during the receive_chars */ - if (lsr & UART_LSR_THRE) - transmit_chars(up); - - spin_unlock_irqrestore(&up->port.lock, flags); - return IRQ_HANDLED; -} - -static inline void dma_chan_irq(struct hsu_dma_chan *chan) -{ - struct uart_hsu_port *up = chan->uport; - unsigned long flags; - u32 int_sts; - - spin_lock_irqsave(&up->port.lock, flags); - - if (!up->use_dma || !up->running) - goto exit; - - /* - * No matter what situation, need read clear the IRQ status - * There is a bug, see Errata 5, HSD 2900918 - */ - int_sts = chan_readl(chan, HSU_CH_SR); - - /* Rx channel */ - if (chan->dirt == DMA_FROM_DEVICE) - hsu_dma_rx(up, int_sts, &flags); - - /* Tx channel */ - if (chan->dirt == DMA_TO_DEVICE) { - chan_writel(chan, HSU_CH_CR, 0x0); - up->dma_tx_on = 0; - hsu_dma_tx(up); - } - -exit: - spin_unlock_irqrestore(&up->port.lock, flags); - return; -} - -static irqreturn_t dma_irq(int irq, void *dev_id) -{ - struct hsu_port *hsu = dev_id; - u32 int_sts, i; - - int_sts = mfd_readl(hsu, HSU_GBL_DMAISR); - - /* Currently we only have 6 channels may be used */ - for (i = 0; i < 6; i++) { - if (int_sts & 0x1) - dma_chan_irq(&hsu->chans[i]); - int_sts >>= 1; - } - - return IRQ_HANDLED; -} - -static unsigned int serial_hsu_tx_empty(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - unsigned int ret; - - spin_lock_irqsave(&up->port.lock, flags); - ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0; - spin_unlock_irqrestore(&up->port.lock, flags); - - return ret; -} - -static unsigned int serial_hsu_get_mctrl(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned char status; - unsigned int ret; - - status = serial_in(up, UART_MSR); - - ret = 0; - if (status & UART_MSR_DCD) - ret |= TIOCM_CAR; - if (status & UART_MSR_RI) - ret |= TIOCM_RNG; - if (status & UART_MSR_DSR) - ret |= TIOCM_DSR; - if (status & UART_MSR_CTS) - ret |= TIOCM_CTS; - return ret; -} - -static void serial_hsu_set_mctrl(struct uart_port *port, unsigned int mctrl) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned char mcr = 0; - - if (mctrl & TIOCM_RTS) - mcr |= UART_MCR_RTS; - if (mctrl & TIOCM_DTR) - mcr |= UART_MCR_DTR; - if (mctrl & TIOCM_OUT1) - mcr |= UART_MCR_OUT1; - if (mctrl & TIOCM_OUT2) - mcr |= UART_MCR_OUT2; - if (mctrl & TIOCM_LOOP) - mcr |= UART_MCR_LOOP; - - mcr |= up->mcr; - - serial_out(up, UART_MCR, mcr); -} - -static void serial_hsu_break_ctl(struct uart_port *port, int break_state) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - - spin_lock_irqsave(&up->port.lock, flags); - if (break_state == -1) - up->lcr |= UART_LCR_SBC; - else - up->lcr &= ~UART_LCR_SBC; - serial_out(up, UART_LCR, up->lcr); - spin_unlock_irqrestore(&up->port.lock, flags); -} - -/* - * What special to do: - * 1. chose the 64B fifo mode - * 2. start dma or pio depends on configuration - * 3. we only allocate dma memory when needed - */ -static int serial_hsu_startup(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - - pm_runtime_get_sync(up->dev); - - /* - * Clear the FIFO buffers and disable them. - * (they will be reenabled in set_termios()) - */ - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); - serial_out(up, UART_FCR, 0); - - /* Clear the interrupt registers. */ - (void) serial_in(up, UART_LSR); - (void) serial_in(up, UART_RX); - (void) serial_in(up, UART_IIR); - (void) serial_in(up, UART_MSR); - - /* Now, initialize the UART, default is 8n1 */ - serial_out(up, UART_LCR, UART_LCR_WLEN8); - - spin_lock_irqsave(&up->port.lock, flags); - - up->port.mctrl |= TIOCM_OUT2; - serial_hsu_set_mctrl(&up->port, up->port.mctrl); - - /* - * Finally, enable interrupts. Note: Modem status interrupts - * are set via set_termios(), which will be occurring imminently - * anyway, so we don't enable them here. - */ - if (!up->use_dma) - up->ier = UART_IER_RLSI | UART_IER_RDI | UART_IER_RTOIE; - else - up->ier = 0; - serial_out(up, UART_IER, up->ier); - - spin_unlock_irqrestore(&up->port.lock, flags); - - /* DMA init */ - if (up->use_dma) { - struct hsu_dma_buffer *dbuf; - struct circ_buf *xmit = &port->state->xmit; - - up->dma_tx_on = 0; - - /* First allocate the RX buffer */ - dbuf = &up->rxbuf; - dbuf->buf = kzalloc(HSU_DMA_BUF_SIZE, GFP_KERNEL); - if (!dbuf->buf) { - up->use_dma = 0; - goto exit; - } - dbuf->dma_addr = dma_map_single(port->dev, - dbuf->buf, - HSU_DMA_BUF_SIZE, - DMA_FROM_DEVICE); - dbuf->dma_size = HSU_DMA_BUF_SIZE; - - /* Start the RX channel right now */ - hsu_dma_start_rx_chan(up->rxc, dbuf); - - /* Next init the TX DMA */ - dbuf = &up->txbuf; - dbuf->buf = xmit->buf; - dbuf->dma_addr = dma_map_single(port->dev, - dbuf->buf, - UART_XMIT_SIZE, - DMA_TO_DEVICE); - dbuf->dma_size = UART_XMIT_SIZE; - - /* This should not be changed all around */ - chan_writel(up->txc, HSU_CH_BSR, 32); - chan_writel(up->txc, HSU_CH_MOTSR, 4); - dbuf->ofs = 0; - } - -exit: - /* And clear the interrupt registers again for luck. */ - (void) serial_in(up, UART_LSR); - (void) serial_in(up, UART_RX); - (void) serial_in(up, UART_IIR); - (void) serial_in(up, UART_MSR); - - up->running = 1; - return 0; -} - -static void serial_hsu_shutdown(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - - /* Disable interrupts from this port */ - up->ier = 0; - serial_out(up, UART_IER, 0); - up->running = 0; - - spin_lock_irqsave(&up->port.lock, flags); - up->port.mctrl &= ~TIOCM_OUT2; - serial_hsu_set_mctrl(&up->port, up->port.mctrl); - spin_unlock_irqrestore(&up->port.lock, flags); - - /* Disable break condition and FIFOs */ - serial_out(up, UART_LCR, serial_in(up, UART_LCR) & ~UART_LCR_SBC); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | - UART_FCR_CLEAR_XMIT); - serial_out(up, UART_FCR, 0); - - pm_runtime_put(up->dev); -} - -static void -serial_hsu_set_termios(struct uart_port *port, struct ktermios *termios, - struct ktermios *old) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned char cval, fcr = 0; - unsigned long flags; - unsigned int baud, quot; - u32 ps, mul; - - switch (termios->c_cflag & CSIZE) { - case CS5: - cval = UART_LCR_WLEN5; - break; - case CS6: - cval = UART_LCR_WLEN6; - break; - case CS7: - cval = UART_LCR_WLEN7; - break; - default: - case CS8: - cval = UART_LCR_WLEN8; - break; - } - - /* CMSPAR isn't supported by this driver */ - termios->c_cflag &= ~CMSPAR; - - if (termios->c_cflag & CSTOPB) - cval |= UART_LCR_STOP; - if (termios->c_cflag & PARENB) - cval |= UART_LCR_PARITY; - if (!(termios->c_cflag & PARODD)) - cval |= UART_LCR_EPAR; - - /* - * The base clk is 50Mhz, and the baud rate come from: - * baud = 50M * MUL / (DIV * PS * DLAB) - * - * For those basic low baud rate we can get the direct - * scalar from 2746800, like 115200 = 2746800/24. For those - * higher baud rate, we handle them case by case, mainly by - * adjusting the MUL/PS registers, and DIV register is kept - * as default value 0x3d09 to make things simple - */ - baud = uart_get_baud_rate(port, termios, old, 0, 4000000); - - quot = 1; - ps = 0x10; - mul = 0x3600; - switch (baud) { - case 3500000: - mul = 0x3345; - ps = 0xC; - break; - case 1843200: - mul = 0x2400; - break; - case 3000000: - case 2500000: - case 2000000: - case 1500000: - case 1000000: - case 500000: - /* mul/ps/quot = 0x9C4/0x10/0x1 will make a 500000 bps */ - mul = baud / 500000 * 0x9C4; - break; - default: - /* Use uart_get_divisor to get quot for other baud rates */ - quot = 0; - } - - if (!quot) - quot = uart_get_divisor(port, baud); - - if ((up->port.uartclk / quot) < (2400 * 16)) - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_1B; - else if ((up->port.uartclk / quot) < (230400 * 16)) - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_16B; - else - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_32B; - - fcr |= UART_FCR_HSU_64B_FIFO; - - /* - * Ok, we're now changing the port state. Do it with - * interrupts disabled. - */ - spin_lock_irqsave(&up->port.lock, flags); - - /* Update the per-port timeout */ - uart_update_timeout(port, termios->c_cflag, baud); - - up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; - if (termios->c_iflag & INPCK) - up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) - up->port.read_status_mask |= UART_LSR_BI; - - /* Characters to ignore */ - up->port.ignore_status_mask = 0; - if (termios->c_iflag & IGNPAR) - up->port.ignore_status_mask |= UART_LSR_PE | UART_LSR_FE; - if (termios->c_iflag & IGNBRK) { - up->port.ignore_status_mask |= UART_LSR_BI; - /* - * If we're ignoring parity and break indicators, - * ignore overruns too (for real raw support). - */ - if (termios->c_iflag & IGNPAR) - up->port.ignore_status_mask |= UART_LSR_OE; - } - - /* Ignore all characters if CREAD is not set */ - if ((termios->c_cflag & CREAD) == 0) - up->port.ignore_status_mask |= UART_LSR_DR; - - /* - * CTS flow control flag and modem status interrupts, disable - * MSI by default - */ - up->ier &= ~UART_IER_MSI; - if (UART_ENABLE_MS(&up->port, termios->c_cflag)) - up->ier |= UART_IER_MSI; - - serial_out(up, UART_IER, up->ier); - - if (termios->c_cflag & CRTSCTS) - up->mcr |= UART_MCR_AFE | UART_MCR_RTS; - else - up->mcr &= ~UART_MCR_AFE; - - serial_out(up, UART_LCR, cval | UART_LCR_DLAB); /* set DLAB */ - serial_out(up, UART_DLL, quot & 0xff); /* LS of divisor */ - serial_out(up, UART_DLM, quot >> 8); /* MS of divisor */ - serial_out(up, UART_LCR, cval); /* reset DLAB */ - serial_out(up, UART_MUL, mul); /* set MUL */ - serial_out(up, UART_PS, ps); /* set PS */ - up->lcr = cval; /* Save LCR */ - serial_hsu_set_mctrl(&up->port, up->port.mctrl); - serial_out(up, UART_FCR, fcr); - spin_unlock_irqrestore(&up->port.lock, flags); -} - -static void -serial_hsu_pm(struct uart_port *port, unsigned int state, - unsigned int oldstate) -{ -} - -static void serial_hsu_release_port(struct uart_port *port) -{ -} - -static int serial_hsu_request_port(struct uart_port *port) -{ - return 0; -} - -static void serial_hsu_config_port(struct uart_port *port, int flags) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - up->port.type = PORT_MFD; -} - -static int -serial_hsu_verify_port(struct uart_port *port, struct serial_struct *ser) -{ - /* We don't want the core code to modify any port params */ - return -EINVAL; -} - -static const char * -serial_hsu_type(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - return up->name; -} - -/* Mainly for uart console use */ -static struct uart_hsu_port *serial_hsu_ports[3]; -static struct uart_driver serial_hsu_reg; - -#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -/* Wait for transmitter & holding register to empty */ -static inline void wait_for_xmitr(struct uart_hsu_port *up) -{ - unsigned int status, tmout = 1000; - - /* Wait up to 1ms for the character to be sent. */ - do { - status = serial_in(up, UART_LSR); - - if (status & UART_LSR_BI) - up->lsr_break_flag = UART_LSR_BI; - - if (--tmout == 0) - break; - udelay(1); - } while (!(status & BOTH_EMPTY)); - - /* Wait up to 1s for flow control if necessary */ - if (up->port.flags & UPF_CONS_FLOW) { - tmout = 1000000; - while (--tmout && - ((serial_in(up, UART_MSR) & UART_MSR_CTS) == 0)) - udelay(1); - } -} - -static void serial_hsu_console_putchar(struct uart_port *port, int ch) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - - wait_for_xmitr(up); - serial_out(up, UART_TX, ch); -} - -/* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... - * - * The console_lock must be held when we get here. - */ -static void -serial_hsu_console_write(struct console *co, const char *s, unsigned int count) -{ - struct uart_hsu_port *up = serial_hsu_ports[co->index]; - unsigned long flags; - unsigned int ier; - int locked = 1; - - touch_nmi_watchdog(); - - local_irq_save(flags); - if (up->port.sysrq) - locked = 0; - else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); - - /* First save the IER then disable the interrupts */ - ier = serial_in(up, UART_IER); - serial_out(up, UART_IER, 0); - - uart_console_write(&up->port, s, count, serial_hsu_console_putchar); - - /* - * Finally, wait for transmitter to become empty - * and restore the IER - */ - wait_for_xmitr(up); - serial_out(up, UART_IER, ier); - - if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); -} - -static struct console serial_hsu_console; - -static int __init -serial_hsu_console_setup(struct console *co, char *options) -{ - struct uart_hsu_port *up; - int baud = 115200; - int bits = 8; - int parity = 'n'; - int flow = 'n'; - - if (co->index == -1 || co->index >= serial_hsu_reg.nr) - co->index = 0; - up = serial_hsu_ports[co->index]; - if (!up) - return -ENODEV; - - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); - - return uart_set_options(&up->port, co, baud, parity, bits, flow); -} - -static struct console serial_hsu_console = { - .name = "ttyMFD", - .write = serial_hsu_console_write, - .device = uart_console_device, - .setup = serial_hsu_console_setup, - .flags = CON_PRINTBUFFER, - .index = -1, - .data = &serial_hsu_reg, -}; - -#define SERIAL_HSU_CONSOLE (&serial_hsu_console) -#else -#define SERIAL_HSU_CONSOLE NULL -#endif - -static struct uart_ops serial_hsu_pops = { - .tx_empty = serial_hsu_tx_empty, - .set_mctrl = serial_hsu_set_mctrl, - .get_mctrl = serial_hsu_get_mctrl, - .stop_tx = serial_hsu_stop_tx, - .start_tx = serial_hsu_start_tx, - .stop_rx = serial_hsu_stop_rx, - .enable_ms = serial_hsu_enable_ms, - .break_ctl = serial_hsu_break_ctl, - .startup = serial_hsu_startup, - .shutdown = serial_hsu_shutdown, - .set_termios = serial_hsu_set_termios, - .pm = serial_hsu_pm, - .type = serial_hsu_type, - .release_port = serial_hsu_release_port, - .request_port = serial_hsu_request_port, - .config_port = serial_hsu_config_port, - .verify_port = serial_hsu_verify_port, -}; - -static struct uart_driver serial_hsu_reg = { - .owner = THIS_MODULE, - .driver_name = "MFD serial", - .dev_name = "ttyMFD", - .major = TTY_MAJOR, - .minor = 128, - .nr = 3, - .cons = SERIAL_HSU_CONSOLE, -}; - -#ifdef CONFIG_PM -static int serial_hsu_suspend(struct pci_dev *pdev, pm_message_t state) -{ - void *priv = pci_get_drvdata(pdev); - struct uart_hsu_port *up; - - /* Make sure this is not the internal dma controller */ - if (priv && (pdev->device != 0x081E)) { - up = priv; - uart_suspend_port(&serial_hsu_reg, &up->port); - } - - pci_save_state(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - return 0; -} - -static int serial_hsu_resume(struct pci_dev *pdev) -{ - void *priv = pci_get_drvdata(pdev); - struct uart_hsu_port *up; - int ret; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - ret = pci_enable_device(pdev); - if (ret) - dev_warn(&pdev->dev, - "HSU: can't re-enable device, try to continue\n"); - - if (priv && (pdev->device != 0x081E)) { - up = priv; - uart_resume_port(&serial_hsu_reg, &up->port); - } - return 0; -} - -static int serial_hsu_runtime_idle(struct device *dev) -{ - pm_schedule_suspend(dev, 500); - return -EBUSY; -} - -static int serial_hsu_runtime_suspend(struct device *dev) -{ - return 0; -} - -static int serial_hsu_runtime_resume(struct device *dev) -{ - return 0; -} -#else -#define serial_hsu_suspend NULL -#define serial_hsu_resume NULL -#define serial_hsu_runtime_idle NULL -#define serial_hsu_runtime_suspend NULL -#define serial_hsu_runtime_resume NULL -#endif - -static const struct dev_pm_ops serial_hsu_pm_ops = { - .runtime_suspend = serial_hsu_runtime_suspend, - .runtime_resume = serial_hsu_runtime_resume, - .runtime_idle = serial_hsu_runtime_idle, -}; - -/* temp global pointer before we settle down on using one or four PCI dev */ -static struct hsu_port *phsu; - -static int serial_hsu_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - struct uart_hsu_port *uport; - int index, ret; - - printk(KERN_INFO "HSU: found PCI Serial controller(ID: %04x:%04x)\n", - pdev->vendor, pdev->device); - - switch (pdev->device) { - case 0x081B: - index = 0; - break; - case 0x081C: - index = 1; - break; - case 0x081D: - index = 2; - break; - case 0x081E: - /* internal DMA controller */ - index = 3; - break; - default: - dev_err(&pdev->dev, "HSU: out of index!"); - return -ENODEV; - } - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - if (index == 3) { - /* DMA controller */ - ret = request_irq(pdev->irq, dma_irq, 0, "hsu_dma", phsu); - if (ret) { - dev_err(&pdev->dev, "can not get IRQ\n"); - goto err_disable; - } - pci_set_drvdata(pdev, phsu); - } else { - /* UART port 0~2 */ - uport = &phsu->port[index]; - uport->port.irq = pdev->irq; - uport->port.dev = &pdev->dev; - uport->dev = &pdev->dev; - - ret = request_irq(pdev->irq, port_irq, 0, uport->name, uport); - if (ret) { - dev_err(&pdev->dev, "can not get IRQ\n"); - goto err_disable; - } - uart_add_one_port(&serial_hsu_reg, &uport->port); - - pci_set_drvdata(pdev, uport); - } - - pm_runtime_put_noidle(&pdev->dev); - pm_runtime_allow(&pdev->dev); - - return 0; - -err_disable: - pci_disable_device(pdev); - return ret; -} - -static void hsu_global_init(void) -{ - struct hsu_port *hsu; - struct uart_hsu_port *uport; - struct hsu_dma_chan *dchan; - int i, ret; - - hsu = kzalloc(sizeof(struct hsu_port), GFP_KERNEL); - if (!hsu) - return; - - /* Get basic io resource and map it */ - hsu->paddr = 0xffa28000; - hsu->iolen = 0x1000; - - if (!(request_mem_region(hsu->paddr, hsu->iolen, "HSU global"))) - pr_warn("HSU: error in request mem region\n"); - - hsu->reg = ioremap_nocache((unsigned long)hsu->paddr, hsu->iolen); - if (!hsu->reg) { - pr_err("HSU: error in ioremap\n"); - ret = -ENOMEM; - goto err_free_region; - } - - /* Initialise the 3 UART ports */ - uport = hsu->port; - for (i = 0; i < 3; i++) { - uport->port.type = PORT_MFD; - uport->port.iotype = UPIO_MEM; - uport->port.mapbase = (resource_size_t)hsu->paddr - + HSU_PORT_REG_OFFSET - + i * HSU_PORT_REG_LENGTH; - uport->port.membase = hsu->reg + HSU_PORT_REG_OFFSET - + i * HSU_PORT_REG_LENGTH; - - sprintf(uport->name, "hsu_port%d", i); - uport->port.fifosize = 64; - uport->port.ops = &serial_hsu_pops; - uport->port.line = i; - uport->port.flags = UPF_IOREMAP; - /* set the scalable maxim support rate to 2746800 bps */ - uport->port.uartclk = 115200 * 24 * 16; - - uport->running = 0; - uport->txc = &hsu->chans[i * 2]; - uport->rxc = &hsu->chans[i * 2 + 1]; - - serial_hsu_ports[i] = uport; - uport->index = i; - - if (hsu_dma_enable & (1<use_dma = 1; - else - uport->use_dma = 0; - - uport++; - } - - /* Initialise 6 dma channels */ - dchan = hsu->chans; - for (i = 0; i < 6; i++) { - dchan->id = i; - dchan->dirt = (i & 0x1) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - dchan->uport = &hsu->port[i/2]; - dchan->reg = hsu->reg + HSU_DMA_CHANS_REG_OFFSET + - i * HSU_DMA_CHANS_REG_LENGTH; - - dchan++; - } - - phsu = hsu; - hsu_debugfs_init(hsu); - return; - -err_free_region: - release_mem_region(hsu->paddr, hsu->iolen); - kfree(hsu); - return; -} - -static void serial_hsu_remove(struct pci_dev *pdev) -{ - void *priv = pci_get_drvdata(pdev); - struct uart_hsu_port *up; - - if (!priv) - return; - - pm_runtime_forbid(&pdev->dev); - pm_runtime_get_noresume(&pdev->dev); - - /* For port 0/1/2, priv is the address of uart_hsu_port */ - if (pdev->device != 0x081E) { - up = priv; - uart_remove_one_port(&serial_hsu_reg, &up->port); - } - - free_irq(pdev->irq, priv); - pci_disable_device(pdev); -} - -/* First 3 are UART ports, and the 4th is the DMA */ -static const struct pci_device_id pci_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081B) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081C) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081D) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081E) }, - {}, -}; - -static struct pci_driver hsu_pci_driver = { - .name = "HSU serial", - .id_table = pci_ids, - .probe = serial_hsu_probe, - .remove = serial_hsu_remove, - .suspend = serial_hsu_suspend, - .resume = serial_hsu_resume, - .driver = { - .pm = &serial_hsu_pm_ops, - }, -}; - -static int __init hsu_pci_init(void) -{ - int ret; - - hsu_global_init(); - - ret = uart_register_driver(&serial_hsu_reg); - if (ret) - return ret; - - return pci_register_driver(&hsu_pci_driver); -} - -static void __exit hsu_pci_exit(void) -{ - pci_unregister_driver(&hsu_pci_driver); - uart_unregister_driver(&serial_hsu_reg); - - hsu_debugfs_remove(phsu); - - kfree(phsu); -} - -module_init(hsu_pci_init); -module_exit(hsu_pci_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, pci_ids); diff --git a/include/linux/serial_mfd.h b/include/linux/serial_mfd.h deleted file mode 100644 index 2b071e0b034d..000000000000 --- a/include/linux/serial_mfd.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _SERIAL_MFD_H_ -#define _SERIAL_MFD_H_ - -/* HW register offset definition */ -#define UART_FOR 0x08 -#define UART_PS 0x0C -#define UART_MUL 0x0D -#define UART_DIV 0x0E - -#define HSU_GBL_IEN 0x0 -#define HSU_GBL_IST 0x4 - -#define HSU_GBL_INT_BIT_PORT0 0x0 -#define HSU_GBL_INT_BIT_PORT1 0x1 -#define HSU_GBL_INT_BIT_PORT2 0x2 -#define HSU_GBL_INT_BIT_IRI 0x3 -#define HSU_GBL_INT_BIT_HDLC 0x4 -#define HSU_GBL_INT_BIT_DMA 0x5 - -#define HSU_GBL_ISR 0x8 -#define HSU_GBL_DMASR 0x400 -#define HSU_GBL_DMAISR 0x404 - -#define HSU_PORT_REG_OFFSET 0x80 -#define HSU_PORT0_REG_OFFSET 0x80 -#define HSU_PORT1_REG_OFFSET 0x100 -#define HSU_PORT2_REG_OFFSET 0x180 -#define HSU_PORT_REG_LENGTH 0x80 - -#define HSU_DMA_CHANS_REG_OFFSET 0x500 -#define HSU_DMA_CHANS_REG_LENGTH 0x40 - -#define HSU_CH_SR 0x0 /* channel status reg */ -#define HSU_CH_CR 0x4 /* control reg */ -#define HSU_CH_DCR 0x8 /* descriptor control reg */ -#define HSU_CH_BSR 0x10 /* max fifo buffer size reg */ -#define HSU_CH_MOTSR 0x14 /* minimum ocp transfer size */ -#define HSU_CH_D0SAR 0x20 /* desc 0 start addr */ -#define HSU_CH_D0TSR 0x24 /* desc 0 transfer size */ -#define HSU_CH_D1SAR 0x28 -#define HSU_CH_D1TSR 0x2C -#define HSU_CH_D2SAR 0x30 -#define HSU_CH_D2TSR 0x34 -#define HSU_CH_D3SAR 0x38 -#define HSU_CH_D3TSR 0x3C - -#endif diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index 00adb01fa5f3..e9b4cb0cd7ed 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -241,25 +241,6 @@ #define UART_FCR_PXAR16 0x80 /* receive FIFO threshold = 16 */ #define UART_FCR_PXAR32 0xc0 /* receive FIFO threshold = 32 */ -/* - * Intel MID on-chip HSU (High Speed UART) defined bits - */ -#define UART_FCR_HSU_64_1B 0x00 /* receive FIFO treshold = 1 */ -#define UART_FCR_HSU_64_16B 0x40 /* receive FIFO treshold = 16 */ -#define UART_FCR_HSU_64_32B 0x80 /* receive FIFO treshold = 32 */ -#define UART_FCR_HSU_64_56B 0xc0 /* receive FIFO treshold = 56 */ - -#define UART_FCR_HSU_16_1B 0x00 /* receive FIFO treshold = 1 */ -#define UART_FCR_HSU_16_4B 0x40 /* receive FIFO treshold = 4 */ -#define UART_FCR_HSU_16_8B 0x80 /* receive FIFO treshold = 8 */ -#define UART_FCR_HSU_16_14B 0xc0 /* receive FIFO treshold = 14 */ - -#define UART_FCR_HSU_64B_FIFO 0x20 /* chose 64 bytes FIFO */ -#define UART_FCR_HSU_16B_FIFO 0x00 /* chose 16 bytes FIFO */ - -#define UART_FCR_HALF_EMPT_TXI 0x00 /* trigger TX_EMPT IRQ for half empty */ -#define UART_FCR_FULL_EMPT_TXI 0x08 /* trigger TX_EMPT IRQ for full empty */ - /* * These register definitions are for the 16C950 */ -- cgit v1.2.3 From 079119a2c7c83506ad753e7b95e9ed253e9963f9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Mar 2015 13:52:53 +0200 Subject: serial, x86: use UPF_* constants for flags This patch fixes the following sparse warnings: drivers/tty/serial/8250/8250_core.c:3231:32: warning: incorrect type in assignment (different base types) drivers/tty/serial/8250/8250_core.c:3231:32: expected restricted upf_t [usertype] flags drivers/tty/serial/8250/8250_core.c:3231:32: got unsigned int const [unsigned] flags Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/serial.h | 8 ++++---- drivers/tty/serial/8250/8250.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h index 460b84f64556..8378b8c9109c 100644 --- a/arch/x86/include/asm/serial.h +++ b/arch/x86/include/asm/serial.h @@ -12,11 +12,11 @@ /* Standard COM flags (except for COM4, because of the 8514 problem) */ #ifdef CONFIG_SERIAL_DETECT_IRQ -# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) -# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | ASYNC_AUTO_IRQ) +# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ) +# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | UPF_AUTO_IRQ) #else -# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | 0 ) -# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | 0 ) +# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | 0 ) +# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | 0 ) #endif #define SERIAL_PORT_DFNS \ diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index b00836851061..656ecc60b5b2 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -53,7 +53,7 @@ struct old_serial_port { unsigned int baud_base; unsigned int port; unsigned int irq; - unsigned int flags; + upf_t flags; unsigned char hub6; unsigned char io_type; unsigned char __iomem *iomem_base; -- cgit v1.2.3 From 88ad1a147e2c84d33cb50f5ebff1ece5e0cd4383 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Mar 2015 14:18:12 +1030 Subject: lguest: fix pending interrupt test. Denys says: TEST with zero will always set ZF. Thus, "jnz send_interrupts" never jumps. We get interrupts regularly enough that this didn't cause immediate problems. Reported-by: Denys Vlasenko Signed-off-by: Rusty Russell --- arch/x86/lguest/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S index 6ddfe4fc23c3..05b0a85507ce 100644 --- a/arch/x86/lguest/head_32.S +++ b/arch/x86/lguest/head_32.S @@ -84,7 +84,7 @@ ENTRY(lg_irq_enable) * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we * jump to send_interrupts, otherwise we're done. */ - testl $0, lguest_data+LGUEST_DATA_irq_pending + cmpl $0, lguest_data+LGUEST_DATA_irq_pending jnz send_interrupts /* * One cool thing about x86 is that you can do many things without using -- cgit v1.2.3 From 41f055d49cb04d648a89a2cb6d57c94ff86d5bb6 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Tue, 24 Mar 2015 11:51:38 +1030 Subject: lguest: rename i386_head.S in the comments i386_head.S renamed to the head_32.S, let's update it in the comments too. Signed-off-by: Alexander Kuleshov Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ac4453d8520e..543510a2f9e0 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -262,7 +262,7 @@ PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl); PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable); /*:*/ -/* These are in i386_head.S */ +/* These are in head_32.S */ extern void lg_irq_enable(void); extern void lg_restore_fl(unsigned long flags); @@ -1366,7 +1366,7 @@ static void lguest_restart(char *reason) * fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, - * and these are in i386_head.S. + * and these are in head_32.S. */ /*G:060 We construct a table from the assembler templates: */ -- cgit v1.2.3 From 7042cb4eb30967b5eb9eeba04907882f04d6b6e5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 24 Mar 2015 11:51:39 +1030 Subject: lguest: simplify lguest_iret Signed-off-by: Denys Vlasenko CC: lguest@lists.ozlabs.org CC: x86@kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Rusty Russell --- arch/x86/lguest/head_32.S | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S index 05b0a85507ce..81678bf0fcb7 100644 --- a/arch/x86/lguest/head_32.S +++ b/arch/x86/lguest/head_32.S @@ -168,29 +168,28 @@ ENTRY(lg_restore_fl) * So we have to copy eflags from the stack to lguest_data.irq_enabled before * we do the "iret". * - * There are two problems with this: firstly, we need to use a register to do - * the copy and secondly, the whole thing needs to be atomic. The first - * problem is easy to solve: push %eax on the stack so we can use it, and then - * restore it at the end just before the real "iret". + * There are two problems with this: firstly, we can't clobber any registers + * and secondly, the whole thing needs to be atomic. The first problem + * is solved by using "push memory"/"pop memory" instruction pair for copying. * * The second is harder: copying eflags to lguest_data.irq_enabled will turn * interrupts on before we're finished, so we could be interrupted before we * return to userspace or wherever. Our solution to this is to surround the * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the * Host that it is *never* to interrupt us there, even if interrupts seem to be - * enabled. + * enabled. (It's not necessary to protect pop instruction, since + * data gets updated only after it completes, so we end up surrounding + * just one instruction, iret). */ ENTRY(lguest_iret) - pushl %eax - movl 12(%esp), %eax -lguest_noirq_start: + pushl 2*4(%esp) /* * Note the %ss: segment prefix here. Normal data accesses use the * "ds" segment, but that will have already been restored for whatever * we're returning to (such as userspace): we can't trust it. The %ss: * prefix makes sure we use the stack segment, which is still valid. */ - movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled - popl %eax + popl %ss:lguest_data+LGUEST_DATA_irq_enabled +lguest_noirq_start: iret lguest_noirq_end: -- cgit v1.2.3 From 2f921b5bb0511fb698681d8ef35c48be7a9116bf Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 24 Mar 2015 11:51:39 +1030 Subject: lguest: suppress interrupts for single insn, not range. The last patch reduced our interrupt-suppression region to one address, so simplify the code somewhat. Also, remove the obsolete undefined instruction ranges and the comment which refers to lguest_guest.S instead of head_32.S. Signed-off-by: Rusty Russell --- arch/x86/include/asm/lguest.h | 7 ++----- arch/x86/lguest/boot.c | 3 +-- arch/x86/lguest/head_32.S | 15 ++++++--------- drivers/lguest/hypercalls.c | 5 ++--- drivers/lguest/interrupts_and_traps.c | 8 ++++---- drivers/lguest/lg.h | 2 +- include/linux/lguest.h | 4 ++-- 7 files changed, 18 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index e2d4a4afa8c3..3bbc07a57a31 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -20,13 +20,10 @@ extern unsigned long switcher_addr; /* Found in switcher.S */ extern unsigned long default_idt_entries[]; -/* Declarations for definitions in lguest_guest.S */ -extern char lguest_noirq_start[], lguest_noirq_end[]; +/* Declarations for definitions in arch/x86/lguest/head_32.S */ +extern char lguest_noirq_iret[]; extern const char lgstart_cli[], lgend_cli[]; -extern const char lgstart_sti[], lgend_sti[]; -extern const char lgstart_popf[], lgend_popf[]; extern const char lgstart_pushf[], lgend_pushf[]; -extern const char lgstart_iret[], lgend_iret[]; extern void lguest_iret(void); extern void lguest_init(void); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 543510a2f9e0..13616d708389 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -87,8 +87,7 @@ struct lguest_data lguest_data = { .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, - .noirq_start = (u32)lguest_noirq_start, - .noirq_end = (u32)lguest_noirq_end, + .noirq_iret = (u32)lguest_noirq_iret, .kernel_address = PAGE_OFFSET, .blocked_interrupts = { 1 }, /* Block timer interrupts */ .syscall_vec = SYSCALL_VECTOR, diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S index 81678bf0fcb7..d5ae63f5ec5d 100644 --- a/arch/x86/lguest/head_32.S +++ b/arch/x86/lguest/head_32.S @@ -133,9 +133,8 @@ ENTRY(lg_restore_fl) ret /*:*/ -/* These demark the EIP range where host should never deliver interrupts. */ -.global lguest_noirq_start -.global lguest_noirq_end +/* These demark the EIP where host should never deliver interrupts. */ +.global lguest_noirq_iret /*M:004 * When the Host reflects a trap or injects an interrupt into the Guest, it @@ -174,12 +173,11 @@ ENTRY(lg_restore_fl) * * The second is harder: copying eflags to lguest_data.irq_enabled will turn * interrupts on before we're finished, so we could be interrupted before we - * return to userspace or wherever. Our solution to this is to surround the - * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the + * return to userspace or wherever. Our solution to this is to tell the * Host that it is *never* to interrupt us there, even if interrupts seem to be * enabled. (It's not necessary to protect pop instruction, since - * data gets updated only after it completes, so we end up surrounding - * just one instruction, iret). + * data gets updated only after it completes, so we only need to protect + * one instruction, iret). */ ENTRY(lguest_iret) pushl 2*4(%esp) @@ -190,6 +188,5 @@ ENTRY(lguest_iret) * prefix makes sure we use the stack segment, which is still valid. */ popl %ss:lguest_data+LGUEST_DATA_irq_enabled -lguest_noirq_start: +lguest_noirq_iret: iret -lguest_noirq_end: diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 1219af493c0f..19a32280731d 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -211,10 +211,9 @@ static void initialize(struct lg_cpu *cpu) /* * The Guest tells us where we're not to deliver interrupts by putting - * the range of addresses into "struct lguest_data". + * the instruction address into "struct lguest_data". */ - if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) - || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) + if (get_user(cpu->lg->noirq_iret, &cpu->lg->lguest_data->noirq_iret)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 70dfcdc29f1f..6d4c072b61e1 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -204,8 +204,7 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) * They may be in the middle of an iret, where they asked us never to * deliver interrupts. */ - if (cpu->regs->eip >= cpu->lg->noirq_start && - (cpu->regs->eip < cpu->lg->noirq_end)) + if (cpu->regs->eip == cpu->lg->noirq_iret) return; /* If they're halted, interrupts restart them. */ @@ -395,8 +394,9 @@ static bool direct_trap(unsigned int num) * The Guest has the ability to turn its interrupt gates into trap gates, * if it is careful. The Host will let trap gates can go directly to the * Guest, but the Guest needs the interrupts atomically disabled for an - * interrupt gate. It can do this by pointing the trap gate at instructions - * within noirq_start and noirq_end, where it can safely disable interrupts. + * interrupt gate. The Host could provide a mechanism to register more + * "no-interrupt" regions, and the Guest could point the trap gate at + * instructions within that region, where it can safely disable interrupts. */ /*M:006 diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 307e8b39e7d1..ac8ad0461e80 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -102,7 +102,7 @@ struct lguest { struct pgdir pgdirs[4]; - unsigned long noirq_start, noirq_end; + unsigned long noirq_iret; unsigned int stack_pages; u32 tsc_khz; diff --git a/include/linux/lguest.h b/include/linux/lguest.h index 9962c6bb1311..6db19f35f7c5 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -61,8 +61,8 @@ struct lguest_data { u32 tsc_khz; /* Fields initialized by the Guest at boot: */ - /* Instruction range to suppress interrupts even if enabled */ - unsigned long noirq_start, noirq_end; + /* Instruction to suppress interrupts even if enabled */ + unsigned long noirq_iret; /* Address above which page tables are all identical. */ unsigned long kernel_address; /* The vector to try to use for system calls (0x40 or 0x80). */ -- cgit v1.2.3 From 6e0a0ea12962a2175a9f47621f9fe7a4c866cb12 Mon Sep 17 00:00:00 2001 From: Graeme Gregory Date: Tue, 24 Mar 2015 14:02:39 +0000 Subject: ACPI / sleep: Introduce CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT ACPI 5.1 does not currently support S states for ARM64 hardware but ACPI code will call acpi_target_system_state() and acpi_sleep_init() for device power management, so introduce CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT and select it for x86 and ia64 only to make sleep functions available, and also introduce stub function to allow other drivers to function until S states are defined for ARM64. It will be no functional change for x86 and IA64. Suggested-by: Rafael J. Wysocki Acked-by: Lorenzo Pieralisi Acked-by: Rafael J. Wysocki Signed-off-by: Graeme Gregory Signed-off-by: Tomasz Nowicki Signed-off-by: Hanjun Guo Signed-off-by: Will Deacon --- arch/ia64/Kconfig | 1 + arch/x86/Kconfig | 1 + drivers/acpi/Kconfig | 4 ++++ drivers/acpi/Makefile | 2 +- drivers/acpi/internal.h | 4 ++++ 5 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 074e52bf815c..cc3414fda362 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -10,6 +10,7 @@ config IA64 select ARCH_MIGHT_HAVE_PC_SERIO select PCI if (!IA64_HP_SIM) select ACPI if (!IA64_HP_SIM) + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b7d31ca55187..c3ea9f9a29d1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,6 +22,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_FAST_MULTIPLIER diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index e6c3ddd92665..a726381fea72 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -48,9 +48,13 @@ config ACPI_LEGACY_TABLES_LOOKUP config ARCH_MIGHT_HAVE_ACPI_PDC bool +config ACPI_SYSTEM_POWER_STATES_SUPPORT + bool + config ACPI_SLEEP bool depends on SUSPEND || HIBERNATION + depends on ACPI_SYSTEM_POWER_STATES_SUPPORT default y config ACPI_PROCFS_POWER diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 623b117ad1a2..db153c6a75d7 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -23,7 +23,7 @@ acpi-y += nvs.o # Power management related files acpi-y += wakeup.o -acpi-y += sleep.o +acpi-$(CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT) += sleep.o acpi-y += device_pm.o acpi-$(CONFIG_ACPI_SLEEP) += proc.o diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 56b321aa2b1c..ba4a61e964be 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -161,7 +161,11 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); /*-------------------------------------------------------------------------- Suspend/Resume -------------------------------------------------------------------------- */ +#ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT extern int acpi_sleep_init(void); +#else +static inline int acpi_sleep_init(void) { return -ENXIO; } +#endif #ifdef CONFIG_ACPI_SLEEP int acpi_sleep_proc_init(void); -- cgit v1.2.3 From 828aef376d7a129547bc4ebb949965040177e3da Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 24 Mar 2015 14:02:46 +0000 Subject: ACPI / processor: Introduce phys_cpuid_t for CPU hardware ID CPU hardware ID (phys_id) is defined as u32 in structure acpi_processor, but phys_id is used as int in acpi processor driver, so it will lead to some inconsistence for the drivers. Furthermore, to cater for ACPI arch ports that implement 64 bits CPU ids a generic CPU physical id type is required. So introduce typedef u32 phys_cpuid_t in a common file, and introduce a macro PHYS_CPUID_INVALID as (phys_cpuid_t)(-1) if it's not defined by other archs, this will solve the inconsistence in acpi processor driver, and will prepare for the ACPI on ARM64 for the 64 bit CPU hardware ID in the following patch. CC: Rafael J Wysocki Suggested-by: Lorenzo Pieralisi Reviewed-by: Grant Likely Acked-by: Sudeep Holla Acked-by: Lorenzo Pieralisi Acked-by: Rafael J. Wysocki Signed-off-by: Catalin Marinas [hj: reworked cpu physid map return codes] Signed-off-by: Hanjun Guo Signed-off-by: Will Deacon --- arch/ia64/kernel/acpi.c | 2 +- arch/x86/kernel/acpi/boot.c | 2 +- drivers/acpi/acpi_processor.c | 7 ++++--- drivers/acpi/processor_core.c | 30 +++++++++++++++--------------- include/acpi/processor.h | 6 +++--- include/linux/acpi.h | 7 ++++++- 6 files changed, 30 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 2c4498919d3c..067ef4439fa4 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -887,7 +887,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3d525c6124f6..e4f8582eb756 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 1020b1b53a17..58f335ca2e75 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -170,7 +170,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) acpi_status status; int ret; - if (pr->phys_id == -1) + if (pr->phys_id == PHYS_CPUID_INVALID) return -ENODEV; status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); @@ -215,7 +215,8 @@ static int acpi_processor_get_info(struct acpi_device *device) union acpi_object object = { 0 }; struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; struct acpi_processor *pr = acpi_driver_data(device); - int phys_id, cpu_index, device_declaration = 0; + phys_cpuid_t phys_id; + int cpu_index, device_declaration = 0; acpi_status status = AE_OK; static int cpu0_initialized; unsigned long long value; @@ -263,7 +264,7 @@ static int acpi_processor_get_info(struct acpi_device *device) } phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id); - if (phys_id < 0) + if (phys_id == PHYS_CPUID_INVALID) acpi_handle_debug(pr->handle, "failed to get CPU physical ID.\n"); pr->phys_id = phys_id; diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 7962651cdbd4..51cc29909e08 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -32,7 +32,7 @@ static struct acpi_table_madt *get_madt_table(void) } static int map_lapic_id(struct acpi_subtable_header *entry, - u32 acpi_id, int *apic_id) + u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_apic *lapic = container_of(entry, struct acpi_madt_local_apic, header); @@ -48,7 +48,7 @@ static int map_lapic_id(struct acpi_subtable_header *entry, } static int map_x2apic_id(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, int *apic_id) + int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_x2apic *apic = container_of(entry, struct acpi_madt_local_x2apic, header); @@ -65,7 +65,7 @@ static int map_x2apic_id(struct acpi_subtable_header *entry, } static int map_lsapic_id(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, int *apic_id) + int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_sapic *lsapic = container_of(entry, struct acpi_madt_local_sapic, header); @@ -83,10 +83,10 @@ static int map_lsapic_id(struct acpi_subtable_header *entry, return 0; } -static int map_madt_entry(int type, u32 acpi_id) +static phys_cpuid_t map_madt_entry(int type, u32 acpi_id) { unsigned long madt_end, entry; - int phys_id = -1; /* CPU hardware ID */ + phys_cpuid_t phys_id = PHYS_CPUID_INVALID; /* CPU hardware ID */ struct acpi_table_madt *madt; madt = get_madt_table(); @@ -117,12 +117,12 @@ static int map_madt_entry(int type, u32 acpi_id) return phys_id; } -static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) +static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_subtable_header *header; - int phys_id = -1; + phys_cpuid_t phys_id = PHYS_CPUID_INVALID; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) goto exit; @@ -149,27 +149,27 @@ exit: return phys_id; } -int acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id) +phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id) { - int phys_id; + phys_cpuid_t phys_id; phys_id = map_mat_entry(handle, type, acpi_id); - if (phys_id == -1) + if (phys_id == PHYS_CPUID_INVALID) phys_id = map_madt_entry(type, acpi_id); return phys_id; } -int acpi_map_cpuid(int phys_id, u32 acpi_id) +int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id) { #ifdef CONFIG_SMP int i; #endif - if (phys_id == -1) { + if (phys_id == PHYS_CPUID_INVALID) { /* * On UP processor, there is no _MAT or MADT table. - * So above phys_id is always set to -1. + * So above phys_id is always set to PHYS_CPUID_INVALID. * * BIOS may define multiple CPU handles even for UP processor. * For example, @@ -190,7 +190,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id) if (nr_cpu_ids <= 1 && acpi_id == 0) return acpi_id; else - return phys_id; + return -1; } #ifdef CONFIG_SMP @@ -208,7 +208,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id) int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) { - int phys_id; + phys_cpuid_t phys_id; phys_id = acpi_get_phys_id(handle, type, acpi_id); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index b95dc32a6e6b..4188a4d3b597 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -196,7 +196,7 @@ struct acpi_processor_flags { struct acpi_processor { acpi_handle handle; u32 acpi_id; - u32 phys_id; /* CPU hardware ID such as APIC ID for x86 */ + phys_cpuid_t phys_id; /* CPU hardware ID such as APIC ID for x86 */ u32 id; /* CPU logical ID allocated by OS */ u32 pblk; int performance_platform_limit; @@ -310,8 +310,8 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) #endif /* CONFIG_CPU_FREQ */ /* in processor_core.c */ -int acpi_get_phys_id(acpi_handle, int type, u32 acpi_id); -int acpi_map_cpuid(int phys_id, u32 acpi_id); +phys_cpuid_t acpi_get_phys_id(acpi_handle, int type, u32 acpi_id); +int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id); int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id); /* in processor_pdc.c */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 24c7aa8b1d20..6ec33c595aea 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -146,9 +146,14 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); void acpi_numa_arch_fixup(void); +#ifndef PHYS_CPUID_INVALID +typedef u32 phys_cpuid_t; +#define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) +#endif + #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu); +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -- cgit v1.2.3 From 9e9c3fe40bcd28e3f98f0ad8408435f4503f2781 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 12 Apr 2015 21:47:15 +0300 Subject: KVM: x86: Fix MSR_IA32_BNDCFGS in msrs_to_save kvm_init_msr_list is currently called before hardware_setup. As a result, vmx_mpx_supported always returns false when kvm_init_msr_list checks whether to save MSR_IA32_BNDCFGS. Move kvm_init_msr_list after vmx_hardware_setup is called to fix this issue. Signed-off-by: Nadav Amit Message-Id: <1428864435-4732-1-git-send-email-namit@cs.technion.ac.il> Cc: stable@vger.kernel.org # 3.15+ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1a81267f3f6..ed31c31b2485 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5799,7 +5799,6 @@ int kvm_arch_init(void *opaque) kvm_set_mmio_spte_mask(); kvm_x86_ops = ops; - kvm_init_msr_list(); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0); @@ -7253,7 +7252,14 @@ void kvm_arch_hardware_disable(void) int kvm_arch_hardware_setup(void) { - return kvm_x86_ops->hardware_setup(); + int r; + + r = kvm_x86_ops->hardware_setup(); + if (r != 0) + return r; + + kvm_init_msr_list(); + return 0; } void kvm_arch_hardware_unsetup(void) -- cgit v1.2.3 From bea15428b9d6bc36e87288f168ab314619a66757 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Apr 2015 15:40:02 +0200 Subject: KVM: x86: cleanup kvm_irq_delivery_to_apic_fast Sparse is reporting a "we previously assumed 'src' could be null" error. This is true as far as the static analyzer can see, but in practice only IPIs can set shorthand to self and they also set 'src', so it's ok. Still, move the initialization of x2apic_ipi (and thus the NULL check for src right before the first use. While at it, initializing ret to "false" is somewhat confusing because of the almost immediate assigned of "true" to the same variable. Thus, initialize it to "true" and modify it in the only path that used to use the value from "bool ret = false". There is no change in generated code from this change. Reported-by: Dan Carpenter Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d67206a7b99a..629af0f1c5c4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, unsigned long bitmap = 1; struct kvm_lapic **dst; int i; - bool ret = false; - bool x2apic_ipi = src && apic_x2apic_mode(src); + bool ret, x2apic_ipi; *r = -1; @@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (irq->shorthand) return false; + x2apic_ipi = src && apic_x2apic_mode(src); if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) return false; + ret = true; rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); - if (!map) + if (!map) { + ret = false; goto out; - - ret = true; + } if (irq->dest_mode == APIC_DEST_PHYSICAL) { if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) -- cgit v1.2.3 From 4d178f94ebe123d462a51169b53854cb7f198888 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 12 Apr 2015 09:14:45 -0400 Subject: x86/asm: Merge common 32-bit values in asm-offsets.c Merge common values for 32-bit native and compat. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Denys Vlasenko Link: http://lkml.kernel.org/r/1428844486-6638-1-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets.c | 19 +++++++++++++++++++ arch/x86/kernel/asm-offsets_32.c | 15 --------------- arch/x86/kernel/asm-offsets_64.c | 21 --------------------- 3 files changed, 19 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 9f6b9341950f..b27f6ec90caa 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -41,6 +41,25 @@ void common(void) { OFFSET(pbe_orig_address, pbe, orig_address); OFFSET(pbe_next, pbe, next); +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) + BLANK(); + OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax); + OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx); + OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx); + OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx); + OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si); + OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di); + OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp); + OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp); + OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip); + + BLANK(); + OFFSET(TI_sysenter_return, thread_info, sysenter_return); + + BLANK(); + OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); +#endif + #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 47703aed74cf..628bfd4c06bb 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -17,17 +17,6 @@ void foo(void); void foo(void) { - OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax); - OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx); - OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx); - OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx); - OFFSET(IA32_SIGCONTEXT_si, sigcontext, si); - OFFSET(IA32_SIGCONTEXT_di, sigcontext, di); - OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp); - OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp); - OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip); - BLANK(); - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); @@ -37,7 +26,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_sysenter_return, thread_info, sysenter_return); OFFSET(TI_cpu, thread_info, cpu); BLANK(); @@ -60,9 +48,6 @@ void foo(void) OFFSET(PT_OLDSS, pt_regs, ss); BLANK(); - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); - BLANK(); - OFFSET(saved_context_gdt_desc, saved_context, gdt_desc); BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 5ce6f2da8763..dcaab87da629 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -29,27 +29,6 @@ int main(void) BLANK(); #endif -#ifdef CONFIG_IA32_EMULATION - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - BLANK(); - -#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry) - ENTRY(ax); - ENTRY(bx); - ENTRY(cx); - ENTRY(dx); - ENTRY(si); - ENTRY(di); - ENTRY(bp); - ENTRY(sp); - ENTRY(ip); - BLANK(); -#undef ENTRY - - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); - BLANK(); -#endif - #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) ENTRY(bx); ENTRY(cx); -- cgit v1.2.3 From 14434052ffb3b7fe8f491e9d0a7793376fb79155 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 12 Apr 2015 09:14:46 -0400 Subject: x86/asm: Remove unused TI_cpu Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Denys Vlasenko Link: http://lkml.kernel.org/r/1428844486-6638-2-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 628bfd4c06bb..6ce39025f467 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -26,9 +26,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_cpu, thread_info, cpu); - BLANK(); - OFFSET(PT_EBX, pt_regs, bx); OFFSET(PT_ECX, pt_regs, cx); OFFSET(PT_EDX, pt_regs, dx); -- cgit v1.2.3 From ff22e2010144b6aa050da35851f1fa79087cca06 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 12 Apr 2015 21:45:06 +0200 Subject: x86/asm, x86/power/hibernate: Use local labels in asm ... so that they don't appear in the object file and thus in objdump output. They're local anyway and have a meaning only within that file. No functionality change. Signed-off-by: Borislav Petkov Acked-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: H. Peter Anvin Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: linux-pm@vger.kernel.org Link: http://lkml.kernel.org/r/1428867906-12016-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/power/hibernate_asm_64.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 3c4469a7a929..e2386cb4e0c3 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -78,9 +78,9 @@ ENTRY(restore_image) /* code below has been relocated to a safe page */ ENTRY(core_restore_code) -loop: +.Lloop: testq %rdx, %rdx - jz done + jz .Ldone /* get addresses from the pbe and copy the page */ movq pbe_address(%rdx), %rsi @@ -91,8 +91,8 @@ loop: /* progress to the next pbe */ movq pbe_next(%rdx), %rdx - jmp loop -done: + jmp .Lloop +.Ldone: /* jump to the restore_registers address from the image header */ jmpq *%rax /* -- cgit v1.2.3 From c0f6feba784e1087b905ad097d2d9ac0aaf744a5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 15 Apr 2015 08:50:14 +0200 Subject: x86/asm, x86/acpi/wakeup_64.S: Make global label a local one Make it a local symbol so that it doesn't appear in objdump output. No functionality change - code remains the same, just the global label disappears: ffffffff81039dbe: bf 03 00 00 00 mov $0x3,%edi ffffffff81039dc3: 31 c0 xor %eax,%eax ffffffff81039dc5: e8 b6 fd ff ff callq ffffffff81039b80 -ffffffff81039dca: eb 00 jmp ffffffff81039dcc - -ffffffff81039dcc : +ffffffff81039dca: eb 00 jmp ffffffff81039dcc ffffffff81039dcc: 48 c7 c0 80 1a ca 82 mov $0xffffffff82ca1a80,%rax ffffffff81039dd3: 48 8b 98 e2 00 00 00 mov 0xe2(%rax),%rbx ffffffff81039dda: 0f 22 e3 mov %rbx,%cr4 Signed-off-by: Borislav Petkov Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Len Brown Cc: Linus Torvalds Cc: Pavel Machek Cc: Rafael J. Wysocki Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429080614-22610-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/wakeup_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index ae693b51ed8e..8c35df468104 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -62,7 +62,7 @@ ENTRY(do_suspend_lowlevel) pushfq popq pt_regs_flags(%rax) - movq $resume_point, saved_rip(%rip) + movq $.Lresume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -75,10 +75,10 @@ ENTRY(do_suspend_lowlevel) xorl %eax, %eax call x86_acpi_enter_sleep_state /* in case something went wrong, restore the machine status and go on */ - jmp resume_point + jmp .Lresume_point .align 4 -resume_point: +.Lresume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx -- cgit v1.2.3 From 130005231c9f2090b1b177e2cca9841b562c1784 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 15 Apr 2015 10:24:54 +0800 Subject: kvm: mmu: don't do memslot overflow check As Andres pointed out: | I don't understand the value of this check here. Are we looking for a | broken memslot? Shouldn't this be a BUG_ON? Is this the place to care | about these things? npages is capped to KVM_MEM_MAX_NR_PAGES, i.e. | 2^31. A 64 bit overflow would be caused by a gigantic gfn_start which | would be trouble in many other ways. This patch drops the memslot overflow check to make the codes more simple. Reviewed-by: Andres Lagar-Cavilla Signed-off-by: Wanpeng Li Message-Id: <1429064694-3072-1-git-send-email-wanpeng.li@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 146f295ee322..07bb22157338 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4504,19 +4504,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, bool flush = false; unsigned long *rmapp; unsigned long last_index, index; - gfn_t gfn_start, gfn_end; spin_lock(&kvm->mmu_lock); - gfn_start = memslot->base_gfn; - gfn_end = memslot->base_gfn + memslot->npages - 1; - - if (gfn_start >= gfn_end) - goto out; - rmapp = memslot->arch.rmap[0]; - last_index = gfn_to_index(gfn_end, memslot->base_gfn, - PT_PAGE_TABLE_LEVEL); + last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1, + memslot->base_gfn, PT_PAGE_TABLE_LEVEL); for (index = 0; index <= last_index; ++index, ++rmapp) { if (*rmapp) @@ -4534,7 +4527,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, if (flush) kvm_flush_remote_tlbs(kvm); -out: spin_unlock(&kvm->mmu_lock); } -- cgit v1.2.3 From decf63336e356423300b935afbebeca1fcb15184 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 14 Apr 2015 12:04:10 +0800 Subject: KVM: MMU: fix comment in kvm_mmu_zap_collapsible_spte Soft mmu uses direct shadow page to fill guest large mapping with small pages if huge mapping is disallowed on host. So zapping direct shadow page works well both for soft mmu and hard mmu, it's just less widely applicable. Fix the comment to reflect this. Signed-off-by: Xiao Guangrong Message-Id: <552C91BA.1010703@linux.intel.com> [Fix comment wording further. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 07bb22157338..d43867c33bc4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, pfn = spte_to_pfn(*sptep); /* - * Only EPT supported for now; otherwise, one would need to - * find out efficiently whether the guest page tables are - * also using huge pages. + * We cannot do huge page mapping for indirect shadow pages, + * which are found on the last rmap (level = 1) when not using + * tdp; such shadow pages are synced with the page table in + * the guest, and the guest page table is using 4K page size + * mapping if the indirect sp has level = 1. */ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && -- cgit v1.2.3 From bb668734c4c960c8f61f017585b323b97e5f47b5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 22:26:21 +0000 Subject: VFS: assorted d_backing_inode() annotations Signed-off-by: David Howells Signed-off-by: Al Viro --- arch/x86/kvm/assigned-dev.c | 2 +- drivers/mtd/ubi/build.c | 6 +++--- drivers/mtd/ubi/kapi.c | 2 +- fs/block_dev.c | 2 +- fs/posix_acl.c | 8 ++++---- fs/stat.c | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index 6eb5c20ee373..d090ecf08809 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -666,7 +666,7 @@ static int probe_sysfs_permissions(struct pci_dev *dev) if (r) return r; - inode = path.dentry->d_inode; + inode = d_backing_inode(path.dentry); r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); path_put(&path); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index ba01a8d22d28..5f58635b8360 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1164,9 +1164,9 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) return ERR_PTR(err); /* MTD device number is defined by the major / minor numbers */ - major = imajor(path.dentry->d_inode); - minor = iminor(path.dentry->d_inode); - mode = path.dentry->d_inode->i_mode; + major = imajor(d_backing_inode(path.dentry)); + minor = iminor(d_backing_inode(path.dentry)); + mode = d_backing_inode(path.dentry)->i_mode; path_put(&path); if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode)) return ERR_PTR(-EINVAL); diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 478e00cf2d9e..e844887732fb 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -314,7 +314,7 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) if (error) return ERR_PTR(error); - inode = path.dentry->d_inode; + inode = d_backing_inode(path.dentry); mod = inode->i_mode; ubi_num = ubi_major2num(imajor(inode)); vol_id = iminor(inode) - 1; diff --git a/fs/block_dev.c b/fs/block_dev.c index 897ee0503932..79b4fa3b391d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1716,7 +1716,7 @@ struct block_device *lookup_bdev(const char *pathname) if (error) return ERR_PTR(error); - inode = path.dentry->d_inode; + inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 3a48bb789c9f..84bb65b83570 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -774,12 +774,12 @@ posix_acl_xattr_get(struct dentry *dentry, const char *name, struct posix_acl *acl; int error; - if (!IS_POSIXACL(dentry->d_inode)) + if (!IS_POSIXACL(d_backing_inode(dentry))) return -EOPNOTSUPP; if (d_is_symlink(dentry)) return -EOPNOTSUPP; - acl = get_acl(dentry->d_inode, type); + acl = get_acl(d_backing_inode(dentry), type); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -795,7 +795,7 @@ static int posix_acl_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags, int type) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_backing_inode(dentry); struct posix_acl *acl = NULL; int ret; @@ -834,7 +834,7 @@ posix_acl_xattr_list(struct dentry *dentry, char *list, size_t list_size, const char *xname; size_t size; - if (!IS_POSIXACL(dentry->d_inode)) + if (!IS_POSIXACL(d_backing_inode(dentry))) return -EOPNOTSUPP; if (d_is_symlink(dentry)) return -EOPNOTSUPP; diff --git a/fs/stat.c b/fs/stat.c index 19636af5e75c..cccc1aab9a8b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(generic_fillattr); */ int vfs_getattr_nosec(struct path *path, struct kstat *stat) { - struct inode *inode = path->dentry->d_inode; + struct inode *inode = d_backing_inode(path->dentry); if (inode->i_op->getattr) return inode->i_op->getattr(path->mnt, path->dentry, stat); @@ -326,7 +326,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, retry: error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty); if (!error) { - struct inode *inode = path.dentry->d_inode; + struct inode *inode = d_backing_inode(path.dentry); error = empty ? -ENOENT : -EINVAL; if (inode->i_op->readlink) { -- cgit v1.2.3 From 6a907738ab9840ca3d71c22cd28fba4cbae7f7ce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Apr 2015 10:51:26 +0100 Subject: x86/asm: Enable fast 32-bit put_user_64() for copy_to_user() For fixed sized copies, copy_to_user() will utilize __put_user_size() fastpaths. However, it is missing the translation for 64-bit copies on x86/32. Testing on a Pinetrail Atom, the 64 bit put_user() fastpath is substantially faster than the generic copy_to_user() fallback. Signed-off-by: Chris Wilson Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Link: http://lkml.kernel.org/r/1429091486-11443-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 3c03a5de64d3..0ed5504c6060 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); return ret; + case 8: + __put_user_size(*(u64 *)from, (u64 __user *)to, + 8, ret, 8); + return ret; } } return __copy_to_user_ll(to, from, n); -- cgit v1.2.3 From 98b228f55014870092c15d7d168fecac69f2f12a Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Wed, 15 Apr 2015 16:32:24 -0700 Subject: x86/efi: Store upper bits of command line buffer address in ext_cmd_line_ptr Until now, the EFI stub was only setting the 32 bit cmd_line_ptr in the setup_header structure, so on 64 bit platforms this could be truncated. This patch adds setting the upper bits of the buffer address in ext_cmd_line_ptr. This case was likely never hit, as the allocation for this buffer is done at the lowest available address. Only x86_64 kernels have this problem, as the 1-1 mapping mandated by EFI ensures that all memory is 32 bit addressable on 32 bit platforms. The EFI stub does not support mixed mode, so the 32 bit kernel on 64 bit firmware case does not need to be handled. Signed-off-by: Roy Franz Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 92b9a5f2aed6..5999980206bf 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1110,6 +1110,8 @@ struct boot_params *make_boot_params(struct efi_config *c) if (!cmdline_ptr) goto fail; hdr->cmd_line_ptr = (unsigned long)cmdline_ptr; + /* Fill in upper bits of command line address, NOP on 32 bit */ + boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32; hdr->ramdisk_image = 0; hdr->ramdisk_size = 0; -- cgit v1.2.3 From 94d4b4765b7ddb8478b0d57663cf7a08e2263bbf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 23 Nov 2012 19:19:07 +0100 Subject: x86/mm: Clean up types in xlate_dev_mem_ptr() Pavel Machek reported the following compiler warning on x86/32 CONFIG_HIGHMEM64G=y builds: arch/x86/mm/ioremap.c:344:10: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Clean up the types in this function by using a single natural type for internal calculations (unsigned long), to make it more apparent what's happening, and also to remove fragile casts. Reported-by: Pavel Machek Cc: jgross@suse.com Cc: roland@purestorage.com Link: http://lkml.kernel.org/r/20150416080440.GA507@amd Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index fdf617c00e2f..4bf037b20f47 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -332,18 +332,20 @@ EXPORT_SYMBOL(iounmap); */ void *xlate_dev_mem_ptr(phys_addr_t phys) { - void *addr; - unsigned long start = phys & PAGE_MASK; + unsigned long start = phys & PAGE_MASK; + unsigned long offset = phys & ~PAGE_MASK; + unsigned long vaddr; /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void __force *)ioremap_cache(start, PAGE_SIZE); - if (addr) - addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); + vaddr = (unsigned long)ioremap_cache(start, PAGE_SIZE); + /* Only add the offset on success and return NULL if the ioremap() failed: */ + if (vaddr) + vaddr += offset; - return addr; + return (void *)vaddr; } void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) -- cgit v1.2.3 From 085e68eeafbf76e21848ad5bafaecec88a11dd64 Mon Sep 17 00:00:00 2001 From: Ben Serebrin Date: Thu, 16 Apr 2015 11:58:05 -0700 Subject: KVM: VMX: Preserve host CR4.MCE value while in guest mode. The host's decision to enable machine check exceptions should remain in force during non-root mode. KVM was writing 0 to cr4 on VCPU reset and passed a slightly-modified 0 to the vmcs.guest_cr4 value. Tested: Built. On earlier version, tested by injecting machine check while a guest is spinning. Before the change, if guest CR4.MCE==0, then the machine check is escalated to Catastrophic Error (CATERR) and the machine dies. If guest CR4.MCE==1, then the machine check causes VMEXIT and is handled normally by host Linux. After the change, injecting a machine check causes normal Linux machine check handling. Signed-off-by: Ben Serebrin Reviewed-by: Venkatesh Srinivas Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f5e8dce8046c..f7b61687bd79 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + /* + * Pass through host's Machine Check Enable value to hw_cr4, which + * is in force while we are in guest mode. Do not let guests control + * this bit, even if host CR4.MCE == 0. + */ + unsigned long hw_cr4 = + (cr4_read_shadow() & X86_CR4_MCE) | + (cr4 & ~X86_CR4_MCE) | + (to_vmx(vcpu)->rmode.vm86_active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); if (cr4 & X86_CR4_VMXE) { /* -- cgit v1.2.3 From aac82d319148c6a84e1bf90b86d3e0ec8bf0ee38 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Apr 2015 15:51:54 -0700 Subject: x86, paravirt, xen: Remove the 64-bit ->irq_enable_sysexit() pvop We don't use irq_enable_sysexit on 64-bit kernels any more. Remove all the paravirt and Xen machinery to support it on 64-bit kernels. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8a03355698fe5b94194e9e7360f19f91c1b2cf1f.1428100853.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 6 ------ arch/x86/include/asm/paravirt_types.h | 7 ++++--- arch/x86/kernel/asm-offsets.c | 2 ++ arch/x86/kernel/paravirt.c | 4 +++- arch/x86/kernel/paravirt_patch_64.c | 1 - arch/x86/xen/enlighten.c | 3 ++- arch/x86/xen/xen-asm_64.S | 16 ---------------- arch/x86/xen/xen-ops.h | 2 ++ 8 files changed, 13 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a821b1cd4fa7..3cdb9eafbf8c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32) swapgs sysretl ENDPROC(native_usergs_sysret32) - -ENTRY(native_irq_enable_sysexit) - swapgs - sti - sysexit -ENDPROC(native_irq_enable_sysexit) #endif /* diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7549b8b369e4..38a0ff9ef06e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -160,13 +160,14 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); unsigned long long (*read_tscp)(unsigned int *aux); +#ifdef CONFIG_X86_32 /* * Atomically enable interrupts and return to userspace. This - * is only ever used to return to 32-bit processes; in a - * 64-bit kernel, it's used for 32-on-64 compat processes, but - * never native 64-bit processes. (Jump, not call.) + * is only used in 32-bit kernels. 64-bit kernels use + * usergs_sysret32 instead. */ void (*irq_enable_sysexit)(void); +#endif /* * Switch to usermode gs and return to 64-bit usermode using diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index b27f6ec90caa..8e3d22a1af94 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -68,7 +68,9 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); +#ifdef CONFIG_X86_32 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); +#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..7563114d9c3a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || +#ifdef CONFIG_X86_32 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || +#endif type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#if defined(CONFIG_X86_32) .irq_enable_sysexit = native_irq_enable_sysexit, #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index a1da6737ba5b..0de21c62c348 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 81665c9f2132..3797b6b31f95 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1267,10 +1267,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tscp = native_read_tscp, .iret = xen_iret, - .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, +#else + .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 985fc3ee0973..a2cabb8bd6bf 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -47,22 +47,6 @@ ENTRY(xen_iret) ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) -/* - * sysexit is not used for 64-bit processes, so it's only ever used to - * return to 32-bit compat userspace. - */ -ENTRY(xen_sysexit) - pushq $__USER32_DS - pushq %rcx - pushq $X86_EFLAGS_IF - pushq $__USER32_CS - pushq %rdx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysexit) -RELOC(xen_sysexit, 1b+1) - ENTRY(xen_sysret64) /* * We're already on the usermode stack at this point, but diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549..c20fe29e65f4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); +#ifdef CONFIG_X86_32 __visible void xen_sysexit(void); +#endif __visible void xen_sysret32(void); __visible void xen_sysret64(void); __visible void xen_adjust_exception_frame(void); -- cgit v1.2.3 From 3462bd2adeadc49d9e126bca3b5536a3437a902d Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Mon, 20 Apr 2015 23:27:11 +0200 Subject: x86/asm: Always inline atomics During some code analysis I realized that atomic_add(), atomic_sub() and friends are not necessarily inlined AND that each function is defined multiple times: atomic_inc: 544 duplicates atomic_dec: 215 duplicates atomic_dec_and_test: 107 duplicates atomic64_inc: 38 duplicates [...] Each definition is exact equally, e.g.: ffffffff813171b8 : 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 01 3e lock add %edi,(%rsi) 5d pop %rbp c3 retq In turn each definition has one or more callsites (sure): ffffffff81317c78: e8 3b f5 ff ff callq ffffffff813171b8 [...] ffffffff8131a062: e8 51 d1 ff ff callq ffffffff813171b8 [...] ffffffff8131a190: e8 23 d0 ff ff callq ffffffff813171b8 [...] The other way around would be to remove the static linkage - but I prefer an enforced inlining here. Before: text data bss dec hex filename 81467393 19874720 20168704 121510817 73e1ba1 vmlinux.orig After: text data bss dec hex filename 81461323 19874720 20168704 121504747 73e03eb vmlinux.inlined Yes, the inlining here makes the kernel even smaller! ;) Linus further observed: "I have this memory of having seen that before - the size heuristics for gcc getting confused by inlining. [...] It might be a good idea to mark things that are basically just wrappers around a single (or a couple of) asm instruction to be always_inline." Signed-off-by: Hagen Paul Pfeifer Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429565231-4609-1-git-send-email-hagen@jauu.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 16 ++++++++-------- arch/x86/include/asm/atomic64_64.h | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e5cd123fdfb..75a9ee8529f3 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i) * * Atomically adds @i to @v. */ -static inline void atomic_add(int i, atomic_t *v) +static __always_inline void atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v) * * Atomically subtracts @i from @v. */ -static inline void atomic_sub(int i, atomic_t *v) +static __always_inline void atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline int atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -static inline void atomic_inc(atomic_t *v) +static __always_inline void atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); @@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic_dec(atomic_t *v) +static __always_inline void atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); @@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic_dec_and_test(atomic_t *v) +static __always_inline int atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } @@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new) * Atomically adds @a to @v, so long as @v was not already @u. * Returns the old value of @v. */ -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index f8d273e18516..b965f9e03f2a 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i) * * Atomically adds @i to @v. */ -static inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static __always_inline void atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) @@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static __always_inline void atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) @@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } -- cgit v1.2.3 From 3b6e042188994466ec257b71296b5f85b894dcd9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Apr 2015 17:26:23 +0200 Subject: perf/x86/intel: Add cpu_(prepare|starting|dying) for core_pmu The core_pmu does not define cpu_* callbacks, which handles allocation of 'struct cpu_hw_events::shared_regs' data, initialization of debug store and PMU_FL_EXCL_CNTRS counters. While this probably won't happen on bare metal, virtual CPU can define x86_pmu.extra_regs together with PMU version 1 and thus be using core_pmu -> using shared_regs data without it being allocated. That could could leave to following panic: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] _spin_lock_irqsave+0x1f/0x40 SNIP [] __intel_shared_reg_get_constraints+0x69/0x1e0 [] intel_get_event_constraints+0x9b/0x180 [] x86_schedule_events+0x75/0x1d0 [] ? check_preempt_curr+0x7c/0x90 [] ? try_to_wake_up+0x24e/0x3e0 [] ? default_wake_function+0x12/0x20 [] ? autoremove_wake_function+0x16/0x40 [] ? __wake_up_common+0x59/0x90 [] ? __d_lookup+0xa7/0x150 [] ? do_lookup+0x9f/0x230 [] ? dput+0x9a/0x150 [] ? path_to_nameidata+0x25/0x60 [] ? __link_path_walk+0x7da/0x1000 [] ? x86_pmu_add+0xb9/0x170 [] x86_pmu_commit_txn+0x67/0xc0 [] ? mntput_no_expire+0x30/0x110 [] ? path_put+0x31/0x40 [] ? current_fs_time+0x27/0x30 [] ? mem_cgroup_get_reclaim_stat_from_page+0x20/0x70 [] group_sched_in+0x13a/0x170 [] ? sched_clock+0x9/0x10 [] ctx_sched_in+0x2e8/0x330 [] perf_event_sched_in+0x6b/0xb0 [] perf_event_context_sched_in+0x76/0xc0 [] perf_event_comm+0x1bb/0x2e0 [] set_task_comm+0x69/0x80 [] setup_new_exec+0xe1/0x2e0 [] load_elf_binary+0x3ce/0x1ab0 Adding cpu_(prepare|starting|dying) for core_pmu to have shared_regs data allocated for core_pmu. AFAICS there's no harm to initialize debug store and PMU_FL_EXCL_CNTRS either for core_pmu. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20150421152623.GC13169@krava.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 66 +++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 219d3fb423a1..960e85de13fb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config) return x86_event_sysfs_show(page, config, event); } -static __initconst const struct x86_pmu core_pmu = { - .name = "core", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = core_pmu_enable_all, - .enable = core_pmu_enable_event, - .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic event period: - */ - .max_period = (1ULL << 31) - 1, - .get_event_constraints = intel_get_event_constraints, - .put_event_constraints = intel_put_event_constraints, - .event_constraints = intel_core_event_constraints, - .guest_get_msrs = core_guest_get_msrs, - .format_attrs = intel_arch_formats_attr, - .events_sysfs_show = intel_event_sysfs_show, -}; - struct intel_shared_regs *allocate_shared_regs(int cpu) { struct intel_shared_regs *regs; @@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = { NULL, }; +static __initconst const struct x86_pmu core_pmu = { + .name = "core", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = core_pmu_enable_all, + .enable = core_pmu_enable_event, + .disable = x86_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32-bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL<<31) - 1, + .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .event_constraints = intel_core_event_constraints, + .guest_get_msrs = core_guest_get_msrs, + .format_attrs = intel_arch_formats_attr, + .events_sysfs_show = intel_event_sysfs_show, + + /* + * Virtual (or funny metal) CPU can define x86_pmu.extra_regs + * together with PMU version 1 and thus be using core_pmu with + * shared_regs. We need following callbacks here to allocate + * it properly. + */ + .cpu_prepare = intel_pmu_cpu_prepare, + .cpu_starting = intel_pmu_cpu_starting, + .cpu_dying = intel_pmu_cpu_dying, +}; + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, -- cgit v1.2.3 From 80bcffb376a6890dd7452b12c1ba032f8f24fef6 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Mon, 20 Apr 2015 15:34:07 -0700 Subject: perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao Cc: Arnaldo Carvalho de Melo Cc: Bjorn Helgaas Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonnyrao@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015b755c..ca75e70865ef 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; -- cgit v1.2.3 From 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Tue, 21 Apr 2015 12:33:11 -0700 Subject: perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao Acked-by: Bjorn Helgaas Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonnyrao@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +++++- include/linux/pci_ids.h | 4 ---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70865ef..4562e9e22c60 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e63c02a93f6b..a59385852233 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2539,10 +2539,6 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- cgit v1.2.3 From 17be0aec74fb036eb4eb32c2268f3420a034762b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 21 Apr 2015 18:27:29 +0200 Subject: x86/asm/entry/64: Implement better check for canonical addresses This change makes the check exact (no more false positives on "negative" addresses). Andy explains: "Canonical addresses either start with 17 zeros or 17 ones. In the old code, we checked that the top (64-47) = 17 bits were all zero. We did this by shifting right by 47 bits and making sure that nothing was left. In the new code, we're shifting left by (64 - 48) = 16 bits and then signed shifting right by the same amount, this propagating the 17th highest bit to all positions to its left. If we get the same value we started with, then we're good to go." While it isn't really important to be fully correct here - almost all addresses we'll ever see will be userspace ones, but OTOH it looks to be cheap enough: the new code uses two more ALU ops but preserves %rcx, allowing to not reload it from pt_regs->cx again. On disassembly level, the changes are: cmp %rcx,0x80(%rsp) -> mov 0x80(%rsp),%r11; cmp %rcx,%r11 shr $0x2f,%rcx -> shl $0x10,%rcx; sar $0x10,%rcx; cmp %rcx,%r11 mov 0x58(%rsp),%rcx -> (eliminated) Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429633649-20169-1-git-send-email-dvlasenk@redhat.com [ Changelog massage. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c7b238494b31..3c78a15a537d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -410,26 +410,27 @@ syscall_return: * a completely clean 64-bit userspace context. */ movq RCX(%rsp),%rcx - cmpq %rcx,RIP(%rsp) /* RCX == RIP */ + movq RIP(%rsp),%r11 + cmpq %rcx,%r11 /* RCX == RIP */ jne opportunistic_sysret_failed /* * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP * in kernel space. This essentially lets the user take over - * the kernel, since userspace controls RSP. It's not worth - * testing for canonicalness exactly -- this check detects any - * of the 17 high bits set, which is true for non-canonical - * or kernel addresses. (This will pessimize vsyscall=native. - * Big deal.) + * the kernel, since userspace controls RSP. * - * If virtual addresses ever become wider, this will need + * If width of "canonical tail" ever becomes variable, this will need * to be updated to remain correct on both old and new CPUs. */ .ifne __VIRTUAL_MASK_SHIFT - 47 .error "virtual address width changed -- SYSRET checks need update" .endif - shr $__VIRTUAL_MASK_SHIFT, %rcx - jnz opportunistic_sysret_failed + /* Change top 16 bits to be the sign-extension of 47th bit */ + shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + /* If this changed %rcx, it was not canonical */ + cmpq %rcx, %r11 + jne opportunistic_sysret_failed cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ jne opportunistic_sysret_failed @@ -466,8 +467,8 @@ syscall_return: */ syscall_return_via_sysret: CFI_REMEMBER_STATE - /* r11 is already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_R11 + /* rcx and r11 are already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp),%rsp USERGS_SYSRET64 CFI_RESTORE_STATE -- cgit v1.2.3 From ac7f5dfb0348a33b2ea92a0c477103c4db45ad4e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 21 Apr 2015 18:03:13 +0200 Subject: x86/asm/entry/64: Merge 32-bit execve stubs with x32 ones, as they are identical Run-tested. Suggested-by: Brian Gerst Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429632194-13445-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3c78a15a537d..e952f6bf1d6d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -525,40 +525,27 @@ GLOBAL(stub_execveat) CFI_ENDPROC END(stub_execveat) -#ifdef CONFIG_X86_X32_ABI +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) .align 8 GLOBAL(stub_x32_execve) +GLOBAL(stub32_execve) CFI_STARTPROC DEFAULT_FRAME 0, 8 call compat_sys_execve jmp return_from_execve CFI_ENDPROC +END(stub32_execve) END(stub_x32_execve) .align 8 GLOBAL(stub_x32_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 - call compat_sys_execveat - jmp return_from_execve - CFI_ENDPROC -END(stub_x32_execveat) -#endif - -#ifdef CONFIG_IA32_EMULATION - .align 8 -GLOBAL(stub32_execve) - CFI_STARTPROC - call compat_sys_execve - jmp return_from_execve - CFI_ENDPROC -END(stub32_execve) - .align 8 GLOBAL(stub32_execveat) CFI_STARTPROC + DEFAULT_FRAME 0, 8 call compat_sys_execveat jmp return_from_execve CFI_ENDPROC END(stub32_execveat) +END(stub_x32_execveat) #endif /* -- cgit v1.2.3 From 3f5159a9221f19b08275b0a6388ab14392ae4eec Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 21 Apr 2015 18:03:14 +0200 Subject: x86/asm/entry/32: Update -ENOSYS handling to match the 64-bit logic Recently Andy changed the 64-bit syscall logic so that pt_regs->ax is initially set to -ENOSYS, and on syscall exit, it is updated with the actual return value. This simplified the logic there. This patch does the same for 32-bit syscall entry points. The check for %rax being too big is moved to be just before the call instruction which dispatches execution through the syscall table. There is no way to accidentally skip this check now by jumping to a label after it. This allows us to remove redundant checks after ptrace et al. If %rax is too big, we just skip over the (call, write %rax to pt_regs->ax) instruction pair. pt_regs->ax remains set to -ENOSYS, and it gets returned to userspace. Similar to 64-bit code, this eliminates the "ia32_badsys" code path. Run-tested. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Will Drewry Link: http://lkml.kernel.org/r/1429632194-13445-2-git-send-email-dvlasenk@redhat.com [ Changelog massage. ] Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 3cdb9eafbf8c..56fd6dd2e342 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -136,7 +136,7 @@ ENTRY(ia32_sysenter_target) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -163,8 +163,6 @@ sysenter_flags_fixed: testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz sysenter_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys sysenter_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -173,8 +171,11 @@ sysenter_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ sysenter_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -241,9 +242,7 @@ sysexit_from_sys_call: movl %ebx,%esi /* 2nd arg: 1st syscall arg */ movl %eax,%edi /* 1st arg: syscall number */ call __audit_syscall_entry - movl RAX(%rsp),%eax /* reload syscall number */ - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys + movl ORIG_RAX(%rsp),%eax /* reload syscall number */ movl %ebx,%edi /* reload 1st syscall arg */ movl RCX(%rsp),%esi /* reload 2nd syscall arg */ movl RDX(%rsp),%edx /* reload 3rd syscall arg */ @@ -294,13 +293,10 @@ sysenter_tracesys: #endif SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -370,7 +366,7 @@ ENTRY(ia32_cstar_target) pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rbp /* pt_regs->cx */ movl %ebp,%ecx - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -386,8 +382,6 @@ ENTRY(ia32_cstar_target) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz cstar_tracesys - cmpq $IA32_NR_syscalls-1,%rax - ja ia32_badsys cstar_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -396,8 +390,11 @@ cstar_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ cstar_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -444,14 +441,11 @@ cstar_tracesys: xchgl %r9d,%ebp SAVE_EXTRA_REGS CLEAR_RREGS r9 - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS xchgl %ebp,%r9d - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -510,7 +504,7 @@ ENTRY(ia32_syscall) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -518,8 +512,6 @@ ENTRY(ia32_syscall) orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz ia32_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys ia32_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -527,9 +519,12 @@ ia32_do_call: xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX(%rsp) +1: ia32_ret_from_sys_call: CLEAR_RREGS jmp int_ret_from_sys_call @@ -537,23 +532,14 @@ ia32_ret_from_sys_call: ia32_tracesys: SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call + CFI_ENDPROC END(ia32_syscall) -ia32_badsys: - movq $0,ORIG_RAX(%rsp) - movq $-ENOSYS,%rax - jmp ia32_sysret - - CFI_ENDPROC - .macro PTREGSCALL label, func ALIGN GLOBAL(\label) -- cgit v1.2.3 From 00425bb181c204c8f250fec122e2817a930e0286 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 24 Apr 2015 08:37:09 +0200 Subject: crypto: x86/sha512_ssse3 - fixup for asm function prototype change Patch e68410ebf626 ("crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer") changed the prototypes of the core asm SHA-512 implementations so that they are compatible with the prototype used by the base layer. However, in one instance, the register that was used for passing the input buffer was reused as a scratch register later on in the code, and since the input buffer param changed places with the digest param -which needs to be written back before the function returns- this resulted in the scratch register to be dereferenced in a memory write operation, causing a GPF. Fix this by changing the scratch register to use the same register as the input buffer param again. Fixes: e68410ebf626 ("crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer") Reported-By: Bobby Powers Tested-By: Bobby Powers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-avx2-asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index a4771dcd1fcf..1f20b35d8573 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -79,7 +79,7 @@ NUM_BLKS = %rdx c = %rcx d = %r8 e = %rdx -y3 = %rdi +y3 = %rsi TBL = %rbp -- cgit v1.2.3 From d869844bd081081bf537e806a44811884230643e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 23 Apr 2015 08:33:59 -0700 Subject: x86: fix special __probe_kernel_write() tail zeroing case Commit cae2a173fe94 ("x86: clean up/fix 'copy_in_user()' tail zeroing") fixed the failure case tail zeroing of one special case of the x86-64 generic user-copy routine, namely when used for the user-to-user case ("copy_in_user()"). But in the process it broke an even more unusual case: using the user copy routine for kernel-to-kernel copying. Now, normally kernel-kernel copies are obviously done using memcpy(), but we have a couple of special cases when we use the user-copy functions. One is when we pass a kernel buffer to a regular user-buffer routine, using set_fs(KERNEL_DS). That's a "normal" case, and continued to work fine, because it never takes any faults (with the possible exception of a silent and successful vmalloc fault). But Jan Beulich pointed out another, very unusual, special case: when we use the user-copy routines not because it's a path that expects a user pointer, but for a couple of ftrace/kgdb cases that want to do a kernel copy, but do so using "unsafe" buffers, and use the user-copy routine to gracefully handle faults. IOW, for probe_kernel_write(). And that broke for the case of a faulting kernel destination, because we saw the kernel destination and wanted to try to clear the tail of the buffer. Which doesn't work, since that's what faults. This only triggers for things like kgdb and ftrace users (eg trying setting a breakpoint on read-only memory), but it's definitely a bug. The fix is to not compare against the kernel address start (TASK_SIZE), but instead use the same limits "access_ok()" uses. Reported-and-tested-by: Jan Beulich Cc: stable@vger.kernel.org # 4.0 Signed-off-by: Linus Torvalds --- arch/x86/lib/usercopy_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 1f33b3d1fd68..0a42327a59d7 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -82,7 +82,7 @@ copy_user_handle_tail(char *to, char *from, unsigned len) clac(); /* If the destination is a kernel buffer, we always clear the end */ - if ((unsigned long)to >= TASK_SIZE_MAX) + if (!__addr_ok(to)) memset(to, 0, len); return len; } -- cgit v1.2.3 From 61f01dd941ba9e06d2bf05994450ecc3d61b6b8b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 26 Apr 2015 16:47:59 -0700 Subject: x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue AMD CPUs don't reinitialize the SS descriptor on SYSRET, so SYSRET with SS == 0 results in an invalid usermode state in which SS is apparently equal to __USER_DS but causes #SS if used. Work around the issue by setting SS to __KERNEL_DS __switch_to, thus ensuring that SYSRET never happens with SS set to NULL. This was exposed by a recent vDSO cleanup. Fixes: e7d6eefaaa44 x86/vdso32/syscall.S: Do not load __USER32_DS to %ss Signed-off-by: Andy Lutomirski Cc: Peter Anvin Cc: Borislav Petkov Cc: Denys Vlasenko Cc: Brian Gerst Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32entry.S | 7 +++++++ arch/x86/include/asm/cpufeature.h | 1 + arch/x86/kernel/cpu/amd.c | 3 +++ arch/x86/kernel/entry_64.S | 9 +++++++++ arch/x86/kernel/process_64.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a821b1cd4fa7..72bf2680f819 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -427,6 +427,13 @@ sysretl_from_sys_call: * cs and ss are loaded from MSRs. * (Note: 32bit->32bit SYSRET is different: since r11 * does not exist, it merely sets eflags.IF=1). + * + * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss + * descriptor is not reinitialized. This means that we must + * avoid SYSRET with SS == NULL, which could happen if we schedule, + * exit the kernel, and re-enter using an interrupt vector. (All + * interrupt entries on x86_64 set SS to NULL.) We prevent that + * from happening by reloading SS in __switch_to. */ USERGS_SYSRET32 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7ee9b94d9921..3d6606fb97d0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -265,6 +265,7 @@ #define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index fd470ebf924e..e4cf63301ff4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); + + /* AMD CPUs don't reset SS attributes on SYSRET */ + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c7b238494b31..02c2eff7478d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -295,6 +295,15 @@ system_call_fastpath: * rflags from r11 (but RF and VM bits are forced to 0), * cs and ss are loaded from MSRs. * Restoration of rflags re-enables interrupts. + * + * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss + * descriptor is not reinitialized. This means that we should + * avoid SYSRET with SS == NULL, which could happen if we schedule, + * exit the kernel, and re-enter using an interrupt vector. (All + * interrupt entries on x86_64 set SS to NULL.) We prevent that + * from happening by reloading SS in __switch_to. (Actually + * detecting the failure in 64-bit userspace is tricky but can be + * done.) */ USERGS_SYSRET64 diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4baaa972f52a..ddfdbf74f174 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); + if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { + /* + * AMD CPUs have a misfeature: SYSRET sets the SS selector but + * does not update the cached descriptor. As a result, if we + * do SYSRET while SS is NULL, we'll end up in user mode with + * SS apparently equal to __USER_DS but actually unusable. + * + * The straightforward workaround would be to fix it up just + * before SYSRET, but that would slow down the system call + * fast paths. Instead, we ensure that SS is never NULL in + * system call context. We do this by replacing NULL SS + * selectors at every context switch. SYSCALL sets up a valid + * SS, so the only way to get NULL is to re-enter the kernel + * from CPL 3 through an interrupt. Since that can't happen + * in the same task as a running syscall, we are guaranteed to + * context switch between every interrupt vector entry and a + * subsequent SYSRET. + * + * We read SS first because SS reads are much faster than + * writes. Out of caution, we force SS to __KERNEL_DS even if + * it previously had a different non-NULL value. + */ + unsigned short ss_sel; + savesegment(ss, ss_sel); + if (ss_sel != __KERNEL_DS) + loadsegment(ss, __KERNEL_DS); + } + return prev_p; } -- cgit v1.2.3 From 5dca0d9147458be9b9363b8a484aa77d710b412a Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Wed, 25 Mar 2015 12:08:14 +0100 Subject: kvm: x86: fix kvmclock update protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kvmclock spec says that the host will increment a version field to an odd number, then update stuff, then increment it to an even number. The host is buggy and doesn't do this, and the result is observable when one vcpu reads another vcpu's kvmclock data. There's no good way for a guest kernel to keep its vdso from reading a different vcpu's kvmclock data, but we don't need to care about changing VCPUs as long as we read a consistent data from kvmclock. (VCPU can change outside of this loop too, so it doesn't matter if we return a value not fit for this VCPU.) Based on a patch by Radim Krčmář. Reviewed-by: Radim Krčmář Acked-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ed31c31b2485..c73efcd03e29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1669,12 +1669,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &guest_hv_clock, sizeof(guest_hv_clock)))) return 0; - /* - * The interface expects us to write an even number signaling that the - * update is finished. Since the guest won't see the intermediate - * state, we just increase by 2 at the end. + /* This VCPU is paused, but it's legal for a guest to read another + * VCPU's kvmclock, so we really have to follow the specification where + * it says that version is odd if data is being modified, and even after + * it is consistent. + * + * Version field updates must be kept separate. This is because + * kvm_write_guest_cached might use a "rep movs" instruction, and + * writes within a string instruction are weakly ordered. So there + * are three writes overall. + * + * As a small optimization, only write the version field in the first + * and third write. The vcpu->pv_time cache is still valid, because the + * version field is the first in the struct. */ - vcpu->hv_clock.version = guest_hv_clock.version + 2; + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + + vcpu->hv_clock.version = guest_hv_clock.version + 1; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); + + smp_wmb(); /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); @@ -1695,6 +1711,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); + + smp_wmb(); + + vcpu->hv_clock.version++; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); return 0; } -- cgit v1.2.3 From 73459e2a1ada09a68c02cc5b73f3116fc8194b3d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Apr 2015 13:20:18 +0200 Subject: x86: pvclock: Really remove the sched notifier for cross-cpu migrations This reverts commits 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 and 80f7fdb1c7f0f9266421f823964fd1962681f6ce. The task migration notifier was originally introduced in order to support the pvclock vsyscall with non-synchronized TSC, but KVM only supports it with synchronized TSC. Hence, on KVM the race condition is only needed due to a bad implementation on the host side, and even then it's so rare that it's mostly theoretical. As far as KVM is concerned it's possible to fix the host, avoiding the additional complexity in the vDSO and the (re)introduction of the task migration notifier. Xen, on the other hand, hasn't yet implemented vsyscall support at all, so we do not care about its plans for non-synchronized TSC. Reported-by: Peter Zijlstra Suggested-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 1 - arch/x86/kernel/pvclock.c | 44 ------------------------------------------ arch/x86/vdso/vclock_gettime.c | 34 ++++++++++++++------------------ include/linux/sched.h | 8 -------- kernel/sched/core.c | 15 -------------- 5 files changed, 15 insertions(+), 87 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 25b1cc07d496..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; - u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20e72dd..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } -static struct pvclock_vsyscall_time_info *pvclock_vdso_info; - -static struct pvclock_vsyscall_time_info * -pvclock_get_vsyscall_user_time_info(int cpu) -{ - if (!pvclock_vdso_info) { - BUG(); - return NULL; - } - - return &pvclock_vdso_info[cpu]; -} - -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) -{ - return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; -} - #ifdef CONFIG_X86_64 -static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, - void *v) -{ - struct task_migration_notifier *mn = v; - struct pvclock_vsyscall_time_info *pvti; - - pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); - - /* this is NULL when pvclock vsyscall is not initialized */ - if (unlikely(pvti == NULL)) - return NOTIFY_DONE; - - pvti->migrate_count++; - - return NOTIFY_DONE; -} - -static struct notifier_block pvclock_migrate = { - .notifier_call = pvclock_task_migrate, -}; - /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); - pvclock_vdso_info = i; - for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } - - register_task_migration_notifier(&pvclock_migrate); - return 0; } #endif diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 40d2473836c9..9793322751e0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; - u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * When looping to get a consistent (time-info, tsc) pair, we - * also need to deal with the possibility we can switch vcpus, - * so make sure we always re-fetch time-info for the current vcpu. + * Note: hypervisor must guarantee that: + * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. + * 2. that per-CPU pvclock time info is updated if the + * underlying CPU changes. + * 3. that version is increased whenever underlying CPU + * changes. + * */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode) * __getcpu() calls (Gleb). */ - /* Make sure migrate_count will change if we leave the VCPU. */ - do { - pvti = get_pvti(cpu); - migrate_count = pvti->migrate_count; - - cpu1 = cpu; - cpu = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1)); + pvti = get_pvti(cpu); version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* * Test we're still on the cpu as well as the version. - * - We must read TSC of pvti's VCPU. - * - KVM doesn't follow the versioning protocol, so data could - * change before version if we left the VCPU. + * We could have been migrated just after the first + * vgetcpu but before fetching the version, so we + * wouldn't notice a version change. */ - smp_rmb(); - } while (unlikely((pvti->pvti.version & 1) || - pvti->pvti.version != version || - pvti->migrate_count != migrate_count)); + cpu1 = __getcpu() & VGETCPU_CPU_MASK; + } while (unlikely(cpu != cpu1 || + (pvti->pvti.version & 1) || + pvti->pvti.version != version)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..26a2e6122734 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); -/* Notifier for when a task gets migrated to a new CPU */ -struct task_migration_notifier { - struct task_struct *task; - int from_cpu; - int to_cpu; -}; -extern void register_task_migration_notifier(struct notifier_block *n); - extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f9123a82cbb6..fe22f7510bce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } -static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); - -void register_task_migration_notifier(struct notifier_block *n) -{ - atomic_notifier_chain_register(&task_migration_notifier, n); -} - #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { @@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu); if (task_cpu(p) != new_cpu) { - struct task_migration_notifier tmn; - if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); - - tmn.task = p; - tmn.from_cpu = task_cpu(p); - tmn.to_cpu = new_cpu; - - atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); } __set_task_cpu(p, new_cpu); -- cgit v1.2.3 From 2b953a5e994ce279904ec70220f7d4f31d380a0a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 28 Apr 2015 18:46:20 -0400 Subject: xen: Suspend ticks on all CPUs during suspend Commit 77e32c89a711 ("clockevents: Manage device's state separately for the core") decouples clockevent device's modes from states. With this change when a Xen guest tries to resume, it won't be calling its set_mode op which needs to be done on each VCPU in order to make the hypervisor aware that we are in oneshot mode. This happens because clockevents_tick_resume() (which is an intermediate step of resuming ticks on a processor) doesn't call clockevents_set_state() anymore and because during suspend clockevent devices on all VCPUs (except for the one doing the suspend) are left in ONESHOT state. As result, during resume the clockevents state machine will assume that device is already where it should be and doesn't need to be updated. To avoid this problem we should suspend ticks on all VCPUs during suspend. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel --- arch/x86/xen/suspend.c | 10 ++++++++++ drivers/xen/manage.c | 9 ++++++--- include/xen/xen-ops.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9497698645a..53b4c0811f4f 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data) tick_resume_local(); } +static void xen_vcpu_notify_suspend(void *data) +{ + tick_suspend_local(); +} + void xen_arch_resume(void) { on_each_cpu(xen_vcpu_notify_restore, NULL, 1); } + +void xen_arch_suspend(void) +{ + on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); +} diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index bf1940706422..9e6a85104a20 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -131,6 +131,8 @@ static void do_suspend(void) goto out_resume; } + xen_arch_suspend(); + si.cancelled = 1; err = stop_machine(xen_suspend, &si, cpumask_of(0)); @@ -148,11 +150,12 @@ static void do_suspend(void) si.cancelled = 1; } + xen_arch_resume(); + out_resume: - if (!si.cancelled) { - xen_arch_resume(); + if (!si.cancelled) xs_resume(); - } else + else xs_suspend_cancel(); dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index c643e6a94c9a..0ce4f32017ea 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -13,6 +13,7 @@ void xen_arch_post_suspend(int suspend_cancelled); void xen_timer_resume(void); void xen_arch_resume(void); +void xen_arch_suspend(void); void xen_resume_notifier_register(struct notifier_block *nb); void xen_resume_notifier_unregister(struct notifier_block *nb); -- cgit v1.2.3 From 2c62e8492ed7358bbe7da51666c7e0f6da9474ee Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 30 Apr 2015 12:41:28 +0800 Subject: x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff] available on PCI bus An IO port or MMIO resource assigned to a PCI host bridge may be consumed by the host bridge itself or available to its child bus/devices. The ACPI specification defines a bit (Producer/Consumer) to tell whether the resource is consumed by the host bridge itself, but firmware hasn't used that bit consistently, so we can't rely on it. Before commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation"), arch/x86/pci/acpi.c ignored all IO port resources defined by acpi_resource_io and acpi_resource_fixed_io to filter out IO ports consumed by the host bridge itself. Commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation") started accepting all IO port and MMIO resources, which caused a regression that IO port resources consumed by the host bridge itself became available to its child devices. Then commit 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by host bridge itself") ignored resources consumed by the host bridge itself by checking the IORESOURCE_WINDOW flag, which accidently removed MMIO resources defined by acpi_resource_memory24, acpi_resource_memory32 and acpi_resource_fixed_memory32. On x86 and IA64 platforms, all IO port and MMIO resources are assumed to be available to child bus/devices except one special case: IO port [0xCF8-0xCFF] is consumed by the host bridge itself to access PCI configuration space. So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. This solution will also ease the way to consolidate ACPI PCI host bridge common code from x86, ia64 and ARM64. Related ACPI table are archived at: https://bugzilla.kernel.org/show_bug.cgi?id=94221 Related discussions at: http://patchwork.ozlabs.org/patch/461633/ https://lkml.org/lkml/2015/3/29/304 Fixes: 63f1789ec716 (Ignore resources consumed by host bridge itself) Reported-by: Bernhard Thaler Signed-off-by: Jiang Liu Cc: 4.0+ # 4.0+ Reviewed-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- arch/x86/pci/acpi.c | 24 ++++++++++++++++++++++-- drivers/acpi/resource.c | 2 +- 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index e4695985f9de..d93963340c3c 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge) kfree(info); } +/* + * An IO port or MMIO resource assigned to a PCI host bridge may be + * consumed by the host bridge itself or available to its child + * bus/devices. The ACPI specification defines a bit (Producer/Consumer) + * to tell whether the resource is consumed by the host bridge itself, + * but firmware hasn't used that bit consistently, so we can't rely on it. + * + * On x86 and IA64 platforms, all IO port and MMIO resources are assumed + * to be available to child bus/devices except one special case: + * IO port [0xCF8-0xCFF] is consumed by the host bridge itself + * to access PCI configuration space. + * + * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. + */ +static bool resource_is_pcicfg_ioport(struct resource *res) +{ + return (res->flags & IORESOURCE_IO) && + res->start == 0xCF8 && res->end == 0xCFF; +} + static void probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, int busnum, int domain, @@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info, "no IO and memory resources present in _CRS\n"); else resource_list_for_each_entry_safe(entry, tmp, list) { - if ((entry->res->flags & IORESOURCE_WINDOW) == 0 || - (entry->res->flags & IORESOURCE_DISABLED)) + if ((entry->res->flags & IORESOURCE_DISABLED) || + resource_is_pcicfg_ioport(entry->res)) resource_list_destroy_entry(entry); else entry->res->name = info->name; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 5589a6e2a023..8244f013f210 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -573,7 +573,7 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_resources); * @ares: Input ACPI resource object. * @types: Valid resource types of IORESOURCE_XXX * - * This is a hepler function to support acpi_dev_get_resources(), which filters + * This is a helper function to support acpi_dev_get_resources(), which filters * ACPI resource objects according to resource types. */ int acpi_dev_filter_resource_type(struct acpi_resource *ares, -- cgit v1.2.3 From e8a4a2696fecb398b0288c43c0e0dbb91e265bb2 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Mon, 4 May 2015 21:15:31 -0700 Subject: x86/spinlocks: Fix regression in spinlock contention detection A spinlock is regarded as contended when there is at least one waiter. Currently, the code that checks whether there are any waiters rely on tail value being greater than head. However, this is not true if tail reaches the max value and wraps back to zero, so arch_spin_is_contended() incorrectly returns 0 (not contended) when tail is smaller than head. The original code (before regression) handled this case by casting the (tail - head) to an unsigned value. This change simply restores that behavior. Fixes: d6abfdb20223 ("x86/spinlocks/paravirt: Fix memory corruption on unlock") Signed-off-by: Tahsin Erdogan Cc: peterz@infradead.org Cc: Waiman.Long@hp.com Cc: borntraeger@de.ibm.com Cc: oleg@redhat.com Cc: raghavendra.kt@linux.vnet.ibm.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1430799331-20445-1-git-send-email-tahsin@google.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index cf87de3fc390..64b611782ef0 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -169,7 +169,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock) struct __raw_tickets tmp = READ_ONCE(lock->tickets); tmp.head &= ~TICKET_SLOWPATH_FLAG; - return (tmp.tail - tmp.head) > TICKET_LOCK_INC; + return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; } #define arch_spin_is_contended arch_spin_is_contended -- cgit v1.2.3 From a71dbdaa8ca2933391b08e0ae5567083e3af0892 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 4 May 2015 11:02:15 -0400 Subject: hypervisor/x86/xen: Unset X86_BUG_SYSRET_SS_ATTRS on Xen PV guests Commit 61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue") makes AMD processors set SS to __KERNEL_DS in __switch_to() to deal with cases when SS is NULL. This breaks Xen PV guests who do not want to load SS with__KERNEL_DS. Since the problem that the commit is trying to address would have to be fixed in the hypervisor (if it in fact exists under Xen) there is no reason to set X86_BUG_SYSRET_SS_ATTRS flag for PV VPCUs here. This can be easily achieved by adding x86_hyper_xen_hvm.set_cpu_features op which will clear this flag. (And since this structure is no longer HVM-specific we should do some renaming). Signed-off-by: Boris Ostrovsky Reported-by: Sander Eikelenboom Signed-off-by: David Vrabel --- arch/x86/include/asm/hypervisor.h | 2 +- arch/x86/kernel/cpu/hypervisor.c | 4 ++-- arch/x86/xen/enlighten.c | 27 ++++++++++++++++++--------- 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index e42f758a0fbd..055ea9941dd5 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper; /* Recognized hypervisors */ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; -extern const struct hypervisor_x86 x86_hyper_xen_hvm; +extern const struct hypervisor_x86 x86_hyper_xen; extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 36ce402a3fa5..d820d8eae96b 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -27,8 +27,8 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = { -#ifdef CONFIG_XEN_PVHVM - &x86_hyper_xen_hvm, +#ifdef CONFIG_XEN + &x86_hyper_xen, #endif &x86_hyper_vmware, &x86_hyper_ms_hyperv, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 94578efd3067..46957ead3060 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = { static void __init xen_hvm_guest_init(void) { + if (xen_pv_domain()) + return; + init_hvm_pv_info(); xen_hvm_init_shared_info(); @@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); } +#endif static bool xen_nopv = false; static __init int xen_parse_nopv(char *arg) @@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); -static uint32_t __init xen_hvm_platform(void) +static uint32_t __init xen_platform(void) { if (xen_nopv) return 0; - if (xen_pv_domain()) - return 0; - return xen_cpuid_base(); } @@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void) } EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); -const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { - .name = "Xen HVM", - .detect = xen_hvm_platform, +static void xen_set_cpu_features(struct cpuinfo_x86 *c) +{ + if (xen_pv_domain()) + clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); +} + +const struct hypervisor_x86 x86_hyper_xen = { + .name = "Xen", + .detect = xen_platform, +#ifdef CONFIG_XEN_PVHVM .init_platform = xen_hvm_guest_init, +#endif .x2apic_available = xen_x2apic_para_available, + .set_cpu_features = xen_set_cpu_features, }; -EXPORT_SYMBOL(x86_hyper_xen_hvm); -#endif +EXPORT_SYMBOL(x86_hyper_xen); -- cgit v1.2.3 From de71ad2c97862eae1516aa36528cc3b317c17b2f Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 4 May 2015 15:16:44 -0300 Subject: x86: Make cpu_tss available to external modules Commit 75182b1632 ("x86/asm/entry: Switch all C consumers of kernel_stack to this_cpu_sp0()") changed current_thread_info to use this_cpu_sp0, and indirectly made it rely on init_tss which was exported with EXPORT_PER_CPU_SYMBOL_GPL. As a result some macros and inline functions such as set/get_fs, test_thread_flag and variants have been made unusable for external modules. Make cpu_tss exported with EXPORT_PER_CPU_SYMBOL so that these functions are accessible again, as they were previously. Signed-off-by: Marc Dionne Acked-by: Andy Lutomirski Link: http://lkml.kernel.org/r/1430763404-21221-1-git-send-email-marc.dionne@your-file-system.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8213da62b1b7..bfc99b3b6522 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif }; -EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss); +EXPORT_PER_CPU_SYMBOL(cpu_tss); #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); -- cgit v1.2.3 From d9ee948d82203811a545ba26b0172fce4970d1dc Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 17 Dec 2014 18:05:29 -0800 Subject: x86/asm: Use -mskip-rax-setup if supported GCC 5 added a compiler option, -mskip-rax-setup, for x86-64. It skips setting up the RAX register when SSE is disabled and there are no variable arguments passed in vector registers. (According to the x86_64 ABI, %al is used as a hidden register containing the number of vector registers used). Since the kernel doesn't pass vector registers to functions with variable arguments, this option can be used to optimize the x86-64 kernel. This GCC feature was suggested by Rasmus Villemoes . This is the corresponding kernel change using it. For kernel v3.17: text data bss dec filename 11455921 2204048 5853184 19513153 vmlinux #with -mskip-rax-setup 11480079 2204048 5853184 19537311 vmlinux For Kernel v4.0+ - custom config: text data bss dec filename 10231778 3479800 16617472 30329050 vmlinux-gcc5+-mskip-rax-setup 10268797 3547448 16621568 30437813 vmlinux Signed-off-by: H.J. Lu Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Rasmus Villemoes Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5ba2d9ce82dc..40af1bac2b7d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -84,6 +84,9 @@ else # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) + # Use -mskip-rax-setup if supported. + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) -- cgit v1.2.3 From c88d47480d300eaad80c213d50c9bf6077fc49bc Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Mon, 27 Apr 2015 08:10:41 -0700 Subject: x86/fpu: Always restore_xinit_state() when use_eager_cpu() The following commit: f893959b0898 ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()") removed drop_init_fpu() usage from flush_thread(). This seems to break things for me - the Go 1.4 test suite fails all over the place with floating point comparision errors (offending commit found through bisection). The functional change was that flush_thread() after this commit only calls restore_init_xstate() when both use_eager_fpu() and !used_math() are true. drop_init_fpu() (now fpu_reset_state()) calls restore_init_xstate() regardless of whether current used_math() - apply the same logic here. Switch used_math() -> tsk_used_math(tsk) to consistently use the grabbed tsk instead of current, like in the rest of flush_thread(). Tested-by: Dave Hansen Signed-off-by: Bobby Powers Signed-off-by: Borislav Petkov Acked-by: Oleg Nesterov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Pekka Riikonen Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Suresh Siddha Cc: Thomas Gleixner Fixes: f893959b ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()") Link: http://lkml.kernel.org/r/1430147441-9820-1-git-send-email-bobbypowers@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bfc99b3b6522..6e338e3b1dc0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -156,11 +156,13 @@ void flush_thread(void) /* FPU state will be reallocated lazily at the first use. */ drop_fpu(tsk); free_thread_xstate(tsk); - } else if (!used_math()) { - /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(init_fpu(tsk))) - force_sig(SIGKILL, tsk); - user_fpu_begin(); + } else { + if (!tsk_used_math(tsk)) { + /* kthread execs. TODO: cleanup this horror. */ + if (WARN_ON(init_fpu(tsk))) + force_sig(SIGKILL, tsk); + user_fpu_begin(); + } restore_init_xstate(); } } -- cgit v1.2.3 From 5b673a48c54594108aec368014efc7334743f06a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Apr 2015 16:40:45 +0200 Subject: x86/alternatives: Document macros Add some text to the macro magic for future reference and against failing human memory. Requested-by: Ingo Molnar Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative-asm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index bdf02eeee765..e7636bac7372 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -18,6 +18,12 @@ .endm #endif +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ .macro altinstruction_entry orig alt feature orig_len alt_len pad_len .long \orig - . .long \alt - . @@ -27,6 +33,12 @@ .byte \pad_len .endm +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ .macro ALTERNATIVE oldinstr, newinstr, feature 140: \oldinstr @@ -55,6 +67,12 @@ */ #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 140: \oldinstr -- cgit v1.2.3 From 8746515d7f04c9ea94cf43e2db1fd2cfca93276d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 24 Apr 2015 10:16:40 +0100 Subject: xen: Add __GFP_DMA flag when xen_swiotlb_init gets free pages on ARM Make sure that xen_swiotlb_init allocates buffers that are DMA capable when at least one memblock is available below 4G. Otherwise we assume that all devices on the SoC can cope with >4G addresses. We do this on ARM and ARM64, where dom0 is mapped 1:1, so pfn == mfn in this case. No functional changes on x86. From: Chen Baozi Signed-off-by: Chen Baozi Signed-off-by: Stefano Stabellini Tested-by: Chen Baozi Acked-by: Konrad Rzeszutek Wilk Signed-off-by: David Vrabel --- arch/arm/include/asm/xen/page.h | 1 + arch/arm/xen/mm.c | 15 +++++++++++++++ arch/x86/include/asm/xen/page.h | 5 +++++ drivers/xen/swiotlb-xen.c | 2 +- 4 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 2f7e6ff67d51..0b579b2f4e0e 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool xen_arch_need_swiotlb(struct device *dev, unsigned long pfn, unsigned long mfn); +unsigned long xen_get_swiotlb_free_pages(unsigned int order); #endif /* _ASM_ARM_XEN_PAGE_H */ diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 793551d15f1d..498325074a06 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,20 @@ #include #include +unsigned long xen_get_swiotlb_free_pages(unsigned int order) +{ + struct memblock_region *reg; + gfp_t flags = __GFP_NOWARN; + + for_each_memblock(memory, reg) { + if (reg->base < (phys_addr_t)0xffffffff) { + flags |= __GFP_DMA; + break; + } + } + return __get_free_pages(flags, order); +} + enum dma_cache_op { DMA_UNMAP, DMA_MAP, diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 358dcd338915..c44a5d53e464 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev, return false; } +static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order) +{ + return __get_free_pages(__GFP_NOWARN, order); +} + #endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 810ad419e34c..4c549323c605 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -235,7 +235,7 @@ retry: #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order); + xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order); if (xen_io_tlb_start) break; order--; -- cgit v1.2.3 From dde74f2e4a4447ef838c57e407f7139de3df68cb Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 27 Apr 2015 15:21:51 +0200 Subject: x86/asm/entry/64: Tidy up JZ insns after TESTs After TESTs, use logically correct JZ/JNZ mnemonics instead of JE/JNE. This doesn't change code. Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Alexei Starovoitov Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1430140912-7960-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e952f6bf1d6d..8f8b22a361df 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -666,7 +666,7 @@ END(irq_entries_start) leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ testl $3, CS-RBP(%rsp) - je 1f + jz 1f SWAPGS 1: /* @@ -721,7 +721,7 @@ ret_from_intr: CFI_ADJUST_CFA_OFFSET RBP testl $3,CS(%rsp) - je retint_kernel + jz retint_kernel /* Interrupt came from user space */ GET_THREAD_INFO(%rcx) @@ -1310,7 +1310,7 @@ ENTRY(error_entry) SAVE_EXTRA_REGS 8 xorl %ebx,%ebx testl $3,CS+8(%rsp) - je error_kernelspace + jz error_kernelspace error_swapgs: SWAPGS error_sti: @@ -1361,7 +1361,7 @@ ENTRY(error_exit) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax - jne retint_kernel + jnz retint_kernel LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi -- cgit v1.2.3 From 03335e95e27fc1f2b17b05b27342ad76986b3cf0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 27 Apr 2015 15:21:52 +0200 Subject: x86/asm/entry/64: Clean up usage of TEST insns By the nature of TEST operation, it is often possible to test a narrower part of the operand: "testl $3, mem" -> "testb $3, mem" This results in shorter insns, because TEST insn has no sign-entending byte-immediate forms unlike other ALU ops. text data bss dec hex filename 11674 0 0 11674 2d9a entry_64.o.before 11658 0 0 11658 2d8a entry_64.o Changes in object code: - f7 84 24 88 00 00 00 03 00 00 00 testl $0x3,0x88(%rsp) + f6 84 24 88 00 00 00 03 testb $0x3,0x88(%rsp) - f7 44 24 68 03 00 00 00 testl $0x3,0x68(%rsp) + f6 44 24 68 03 testb $0x3,0x68(%rsp) - f7 84 24 90 00 00 00 03 00 00 00 testl $0x3,0x90(%rsp) + f6 84 24 90 00 00 00 03 testb $0x3,0x90(%rsp) Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Alexei Starovoitov Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1430140912-7960-2-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8f8b22a361df..60705b032521 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -601,7 +601,7 @@ ENTRY(ret_from_fork) RESTORE_EXTRA_REGS - testl $3,CS(%rsp) # from kernel_thread? + testb $3, CS(%rsp) # from kernel_thread? /* * By the time we get here, we have no idea whether our pt_regs, @@ -665,7 +665,7 @@ END(irq_entries_start) leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ - testl $3, CS-RBP(%rsp) + testb $3, CS-RBP(%rsp) jz 1f SWAPGS 1: @@ -720,7 +720,7 @@ ret_from_intr: CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET RBP - testl $3,CS(%rsp) + testb $3, CS(%rsp) jz retint_kernel /* Interrupt came from user space */ @@ -968,7 +968,7 @@ ENTRY(\sym) .if \paranoid .if \paranoid == 1 CFI_REMEMBER_STATE - testl $3, CS(%rsp) /* If coming from userspace, switch */ + testb $3, CS(%rsp) /* If coming from userspace, switch */ jnz 1f /* stacks. */ .endif call paranoid_entry @@ -1309,7 +1309,7 @@ ENTRY(error_entry) SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 xorl %ebx,%ebx - testl $3,CS+8(%rsp) + testb $3, CS+8(%rsp) jz error_kernelspace error_swapgs: SWAPGS @@ -1606,7 +1606,6 @@ end_repeat_nmi: je 1f movq %r12, %cr2 1: - testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: -- cgit v1.2.3 From 2a4e90b18c256d52a7f3f77d58114f6d4e4a7f9f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 8 May 2015 12:26:02 +0200 Subject: x86: Force inlining of atomic ops With both gcc 4.7.2 and 4.9.2, sometimes gcc mysteriously doesn't inline very small functions we expect to be inlined: $ nm --size-sort vmlinux | grep -iF ' t ' | uniq -c | grep -v '^ *1 ' | sort -rn 473 000000000000000b t spin_unlock_irqrestore 449 000000000000005f t rcu_read_unlock 355 0000000000000009 t atomic_inc <== THIS 353 000000000000006e t rcu_read_lock 350 0000000000000075 t rcu_read_lock_sched_held 291 000000000000000b t spin_unlock 266 0000000000000019 t arch_local_irq_restore 215 000000000000000b t spin_lock 180 0000000000000011 t kzalloc 165 0000000000000012 t list_add_tail 161 0000000000000019 t arch_local_save_flags 153 0000000000000016 t test_and_set_bit 134 000000000000000b t spin_unlock_irq 134 0000000000000009 t atomic_dec <== THIS 130 000000000000000b t spin_unlock_bh 122 0000000000000010 t brelse 120 0000000000000016 t test_and_clear_bit 120 000000000000000b t spin_lock_irq 119 000000000000001e t get_dma_ops 117 0000000000000053 t cpumask_next 116 0000000000000036 t kref_get 114 000000000000001a t schedule_work 106 000000000000000b t spin_lock_bh 103 0000000000000019 t arch_local_irq_disable ... Note sizes of marked functions. They are merely 9 bytes long! Selecting function with 'atomic' in their names: 355 0000000000000009 t atomic_inc 134 0000000000000009 t atomic_dec 98 0000000000000014 t atomic_dec_and_test 31 000000000000000e t atomic_add_return 27 000000000000000a t atomic64_inc 26 000000000000002f t kmap_atomic 24 0000000000000009 t atomic_add 12 0000000000000009 t atomic_sub 10 0000000000000021 t __atomic_add_unless 10 000000000000000a t atomic64_add 5 000000000000001f t __atomic_add_unless.constprop.7 5 000000000000000a t atomic64_dec 4 000000000000001f t __atomic_add_unless.constprop.18 4 000000000000001f t __atomic_add_unless.constprop.12 4 000000000000001f t __atomic_add_unless.constprop.10 3 000000000000001f t __atomic_add_unless.constprop.13 3 0000000000000011 t atomic64_add_return 2 000000000000001f t __atomic_add_unless.constprop.9 2 000000000000001f t __atomic_add_unless.constprop.8 2 000000000000001f t __atomic_add_unless.constprop.6 2 000000000000001f t __atomic_add_unless.constprop.5 2 000000000000001f t __atomic_add_unless.constprop.3 2 000000000000001f t __atomic_add_unless.constprop.22 2 000000000000001f t __atomic_add_unless.constprop.14 2 000000000000001f t __atomic_add_unless.constprop.11 2 000000000000001e t atomic_dec_if_positive 2 0000000000000014 t atomic_inc_and_test 2 0000000000000011 t atomic_add_return.constprop.4 2 0000000000000011 t atomic_add_return.constprop.17 2 0000000000000011 t atomic_add_return.constprop.16 2 000000000000000d t atomic_inc.constprop.4 2 000000000000000c t atomic_cmpxchg This patch fixes this for x86 atomic ops via s/inline/__always_inline/. This decreases allyesconfig kernel by about 25k: text data bss dec hex filename 82399481 22255416 20627456 125282353 777a831 vmlinux.before 82375570 22255544 20627456 125258570 7774b4a vmlinux Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1431080762-17797-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 75a9ee8529f3..e9168955c42f 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -22,7 +22,7 @@ * * Atomically reads the value of @v. */ -static inline int atomic_read(const atomic_t *v) +static __always_inline int atomic_read(const atomic_t *v) { return ACCESS_ONCE((v)->counter); } @@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v) * * Atomically sets the value of @v to @i. */ -static inline void atomic_set(atomic_t *v, int i) +static __always_inline void atomic_set(atomic_t *v, int i) { v->counter = i; } @@ -126,7 +126,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic_inc_and_test(atomic_t *v) +static __always_inline int atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } @@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline int atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } @@ -164,7 +164,7 @@ static __always_inline int atomic_add_return(int i, atomic_t *v) * * Atomically subtracts @i from @v and returns @v - @i */ -static inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int atomic_sub_return(int i, atomic_t *v) { return atomic_add_return(-i, v); } @@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); } @@ -213,7 +213,7 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) * Atomically adds 1 to @v * Returns the new value of @u */ -static inline short int atomic_inc_short(short int *v) +static __always_inline short int atomic_inc_short(short int *v) { asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); return *v; -- cgit v1.2.3 From 63332a8455d8310b77d38779c6c21a660a8d9feb Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 24 Apr 2015 17:31:33 +0200 Subject: x86/entry: Stop using PER_CPU_VAR(kernel_stack) PER_CPU_VAR(kernel_stack) is redundant: - On the 64-bit build, we can use PER_CPU_VAR(cpu_tss + TSS_sp0). - On the 32-bit build, we can use PER_CPU_VAR(cpu_current_top_of_stack). PER_CPU_VAR(kernel_stack) will be deleted by a separate change. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429889495-27850-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/include/asm/thread_info.h | 8 +++++++- arch/x86/kernel/entry_64.S | 2 +- arch/x86/xen/xen-asm_64.S | 5 +++-- 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 2ab0f7182df3..1b1330c07971 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -346,7 +346,7 @@ ENTRY(ia32_cstar_target) SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b4bdec3e9523..d656a363e1eb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -198,9 +198,15 @@ static inline unsigned long current_stack_pointer(void) #else /* !__ASSEMBLY__ */ /* Load thread_info address into "reg" */ +#ifdef CONFIG_X86_32 #define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ _ASM_SUB $(THREAD_SIZE),reg ; +#else +#define GET_THREAD_INFO(reg) \ + _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \ + _ASM_SUB $(THREAD_SIZE),reg ; +#endif /* * ASM operand which evaluates to a 'thread_info' address of diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7423e3e2f5c5..c13b86b40176 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -216,7 +216,7 @@ ENTRY(system_call) GLOBAL(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp /* Construct struct pt_regs on stack */ pushq_cfi $__USER_DS /* pt_regs->ss */ diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index a2cabb8bd6bf..5aa7ec607b9e 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -53,7 +54,7 @@ ENTRY(xen_sysret64) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp pushq $__USER_DS pushq PER_CPU_VAR(rsp_scratch) @@ -72,7 +73,7 @@ ENTRY(xen_sysret32) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp pushq $__USER32_DS pushq PER_CPU_VAR(rsp_scratch) -- cgit v1.2.3 From fed7c3f0f750f225317828d691e9eb76eec887b3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 24 Apr 2015 17:31:34 +0200 Subject: x86/entry: Remove unused 'kernel_stack' per-cpu variable Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429889495-27850-2-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/thread_info.h | 2 -- arch/x86/kernel/cpu/common.c | 4 ---- arch/x86/kernel/process_32.c | 5 +---- arch/x86/kernel/process_64.c | 3 --- arch/x86/kernel/smpboot.c | 2 -- 5 files changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d656a363e1eb..472288962c99 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -177,8 +177,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -DECLARE_PER_CPU(unsigned long, kernel_stack); - static inline struct thread_info *current_thread_info(void) { return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04dac8a..6bec0b55863e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1155,10 +1155,6 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); -DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(kernel_stack); - #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106b06da..a99900cedc22 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -302,13 +302,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0, kernel_stack, and current_top_of_stack. This changes + * Reload esp0 and cpu_current_top_of_stack. This changes * current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f174..82134506faa8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -409,9 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload esp0 and ss1. This changes current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + THREAD_SIZE); - /* * Now maybe reload the debug registers and handle I/O bitmaps */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 50e547eac8cd..023cccf5a4ae 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -792,8 +792,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) + THREAD_SIZE; } /* -- cgit v1.2.3 From 3a23208e69679597e767cf3547b1a30dd845d9b5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 24 Apr 2015 17:31:35 +0200 Subject: x86/entry: Define 'cpu_current_top_of_stack' for 64-bit code 32-bit code has PER_CPU_VAR(cpu_current_top_of_stack). 64-bit code uses somewhat more obscure: PER_CPU_VAR(cpu_tss + TSS_sp0). Define the 'cpu_current_top_of_stack' macro on CONFIG_X86_64 as well so that the PER_CPU_VAR(cpu_current_top_of_stack) expression can be used in both 32-bit and 64-bit code. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429889495-27850-3-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 4 ++-- arch/x86/include/asm/thread_info.h | 10 ++++------ arch/x86/kernel/entry_64.S | 2 +- arch/x86/xen/xen-asm_64.S | 5 +++-- 4 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 1b1330c07971..63450a596800 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -113,7 +113,7 @@ ENTRY(ia32_sysenter_target) * it is too small to ever cause noticeable irq latency. */ SWAPGS_UNSAFE_STACK - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ @@ -346,7 +346,7 @@ ENTRY(ia32_cstar_target) SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 472288962c99..225ee545e1a0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -195,16 +195,14 @@ static inline unsigned long current_stack_pointer(void) #else /* !__ASSEMBLY__ */ +#ifdef CONFIG_X86_64 +# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) +#endif + /* Load thread_info address into "reg" */ -#ifdef CONFIG_X86_32 #define GET_THREAD_INFO(reg) \ _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ _ASM_SUB $(THREAD_SIZE),reg ; -#else -#define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \ - _ASM_SUB $(THREAD_SIZE),reg ; -#endif /* * ASM operand which evaluates to a 'thread_info' address of diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c13b86b40176..09c3f9e0e07e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -216,7 +216,7 @@ ENTRY(system_call) GLOBAL(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp /* Construct struct pt_regs on stack */ pushq_cfi $__USER_DS /* pt_regs->ss */ diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 5aa7ec607b9e..04529e620559 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -54,7 +55,7 @@ ENTRY(xen_sysret64) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER_DS pushq PER_CPU_VAR(rsp_scratch) @@ -73,7 +74,7 @@ ENTRY(xen_sysret32) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER32_DS pushq PER_CPU_VAR(rsp_scratch) -- cgit v1.2.3 From c5bde906d2916d214d78cd8b67d665bf09867033 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 9 May 2015 11:36:50 -0400 Subject: x86/irq: Merge irq_regs & irq_stat Move irq_regs and irq_stat definitions to irq.c. Signed-off-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431185813-15413-2-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 6 ++++++ arch/x86/kernel/irq_32.c | 6 ------ arch/x86/kernel/irq_64.c | 6 ------ 3 files changed, 6 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e5952c225532..fe2ed8bb507b 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -22,6 +22,12 @@ #define CREATE_TRACE_POINTS #include +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); + atomic_t irq_err_count; /* Function pointer for generic interrupt vector handling */ diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index f9fd86a7fcc7..cd74f5978ab9 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -21,12 +21,6 @@ #include -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - #ifdef CONFIG_DEBUG_STACKOVERFLOW int sysctl_panic_on_stackoverflow __read_mostly; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 394e643d7830..bc4604e500a3 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -20,12 +20,6 @@ #include #include -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - int sysctl_panic_on_stackoverflow; /* -- cgit v1.2.3 From c6e692f95dacddff5f3607717fb2246c60bbb714 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 9 May 2015 11:36:51 -0400 Subject: x86/asm/entry/irq: Remove unused invalidate_interrupt prototypes The invalidate_interrupt* functions no longer exist. Signed-off-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431185813-15413-3-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..014c6382ffce 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -36,40 +36,6 @@ extern asmlinkage void spurious_interrupt(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); -extern asmlinkage void invalidate_interrupt(void); -extern asmlinkage void invalidate_interrupt0(void); -extern asmlinkage void invalidate_interrupt1(void); -extern asmlinkage void invalidate_interrupt2(void); -extern asmlinkage void invalidate_interrupt3(void); -extern asmlinkage void invalidate_interrupt4(void); -extern asmlinkage void invalidate_interrupt5(void); -extern asmlinkage void invalidate_interrupt6(void); -extern asmlinkage void invalidate_interrupt7(void); -extern asmlinkage void invalidate_interrupt8(void); -extern asmlinkage void invalidate_interrupt9(void); -extern asmlinkage void invalidate_interrupt10(void); -extern asmlinkage void invalidate_interrupt11(void); -extern asmlinkage void invalidate_interrupt12(void); -extern asmlinkage void invalidate_interrupt13(void); -extern asmlinkage void invalidate_interrupt14(void); -extern asmlinkage void invalidate_interrupt15(void); -extern asmlinkage void invalidate_interrupt16(void); -extern asmlinkage void invalidate_interrupt17(void); -extern asmlinkage void invalidate_interrupt18(void); -extern asmlinkage void invalidate_interrupt19(void); -extern asmlinkage void invalidate_interrupt20(void); -extern asmlinkage void invalidate_interrupt21(void); -extern asmlinkage void invalidate_interrupt22(void); -extern asmlinkage void invalidate_interrupt23(void); -extern asmlinkage void invalidate_interrupt24(void); -extern asmlinkage void invalidate_interrupt25(void); -extern asmlinkage void invalidate_interrupt26(void); -extern asmlinkage void invalidate_interrupt27(void); -extern asmlinkage void invalidate_interrupt28(void); -extern asmlinkage void invalidate_interrupt29(void); -extern asmlinkage void invalidate_interrupt30(void); -extern asmlinkage void invalidate_interrupt31(void); - extern asmlinkage void irq_move_cleanup_interrupt(void); extern asmlinkage void reboot_interrupt(void); extern asmlinkage void threshold_interrupt(void); @@ -178,7 +144,6 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); extern __visible void smp_reschedule_interrupt(struct pt_regs *); extern __visible void smp_call_function_interrupt(struct pt_regs *); extern __visible void smp_call_function_single_interrupt(struct pt_regs *); -extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif extern char irq_entries_start[]; -- cgit v1.2.3 From 51bb92843edcba5a58138cad25ced97923048add Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 9 May 2015 11:36:52 -0400 Subject: x86/asm/entry: Remove SYSCALL_VECTOR Use IA32_SYSCALL_VECTOR for both compat and native. Signed-off-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431185813-15413-4-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 3 --- arch/x86/kernel/traps.c | 4 ++-- arch/x86/lguest/boot.c | 4 ++-- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 666c89ec4bd7..07f27926d473 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -47,9 +47,6 @@ #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR #define IA32_SYSCALL_VECTOR 0x80 -#ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 -#endif /* * Vectors 0x30-0x3f are used for ISA interrupts. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..5e0791f9d3dc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -997,8 +997,8 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(SYSCALL_VECTOR, &system_call); - set_bit(SYSCALL_VECTOR, used_vectors); + set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call); + set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133cc099..cab9aaa7802c 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -90,7 +90,7 @@ struct lguest_data lguest_data = { .noirq_iret = (u32)lguest_noirq_iret, .kernel_address = PAGE_OFFSET, .blocked_interrupts = { 1 }, /* Block timer interrupts */ - .syscall_vec = SYSCALL_VECTOR, + .syscall_vec = IA32_SYSCALL_VECTOR, }; /*G:037 @@ -866,7 +866,7 @@ static void __init lguest_init_IRQ(void) for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) { /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); - if (i != SYSCALL_VECTOR) + if (i != IA32_SYSCALL_VECTOR) set_intr_gate(i, irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR)); } -- cgit v1.2.3 From 8b455e6577f325289cf2d1b20f493b2fe5c6c316 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 9 May 2015 11:36:53 -0400 Subject: x86/asm/entry/irq: Clean up IRQn_VECTOR macros Since the ISA irqs are in a single block, use ISA_IRQ_VECTOR(irq) instead of individual macros. Signed-off-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431185813-15413-5-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 18 +----------------- arch/x86/kernel/apic/io_apic.c | 4 ++-- arch/x86/kernel/apic/vector.c | 2 +- arch/x86/kernel/i8259.c | 8 ++++---- arch/x86/kernel/irqinit.c | 4 ++-- 5 files changed, 10 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 07f27926d473..117db96ad5fb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -52,23 +52,7 @@ * Vectors 0x30-0x3f are used for ISA interrupts. * round up to the next 16-vector boundary */ -#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) - -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq) /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f4dc2462a1ac..e01e4117188a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -258,11 +258,11 @@ int __init arch_early_ioapic_init(void) /* * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. + * ISA_IRQ_VECTOR(irq) for all cpu's. */ for (i = 0; i < nr_legacy_irqs(); i++) { cfg = alloc_irq_and_cfg_at(i, node); - cfg->vector = IRQ0_VECTOR + i; + cfg->vector = ISA_IRQ_VECTOR(i); cpumask_setall(cfg->domain); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6cedd7914581..82d44c314a3f 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -314,7 +314,7 @@ void setup_vector_irq(int cpu) * legacy vector to irq mapping: */ for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; + per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq; __setup_vector_irq(cpu); } diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index e7cc5370cd2f..16cb827a5b27 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -329,8 +329,8 @@ static void init_8259A(int auto_eoi) */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */ + outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); @@ -342,8 +342,8 @@ static void init_8259A(int auto_eoi) outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */ + outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index cd10a6437264..dc1e08d23552 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -86,7 +86,7 @@ void __init init_IRQ(void) int i; /* - * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. + * On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15. * If these IRQ's are handled by legacy interrupt-controllers like PIC, * then this configuration will likely be static after the boot. If * these IRQ's are handled by more mordern controllers like IO-APIC, @@ -94,7 +94,7 @@ void __init init_IRQ(void) * irq's migrate etc. */ for (i = 0; i < nr_legacy_irqs(); i++) - per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; + per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i; x86_init.irqs.intr_init(); } -- cgit v1.2.3 From f21262b8e092a770e39fbd405cc18a0247c3af68 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 11 May 2015 10:15:46 +0200 Subject: x86/alternatives: Switch AMD F15h and later to the P6 NOPs Software optimization guides for both F15h and F16h cite those NOPs as the optimal ones. A microbenchmark confirms that actually even older families are better with the single-insn NOPs so switch to them for the alternatives. Cycles count below includes the loop overhead of the measurement but that overhead is the same with all runs. F10h, revE: ----------- Running NOP tests, 1000 NOPs x 1000000 repetitions K8: 90 288.212282 cycles 66 90 288.220840 cycles 66 66 90 288.219447 cycles 66 66 66 90 288.223204 cycles 66 66 90 66 90 571.393424 cycles 66 66 90 66 66 90 571.374919 cycles 66 66 66 90 66 66 90 572.249281 cycles 66 66 66 90 66 66 66 90 571.388651 cycles P6: 90 288.214193 cycles 66 90 288.225550 cycles 0f 1f 00 288.224441 cycles 0f 1f 40 00 288.225030 cycles 0f 1f 44 00 00 288.233558 cycles 66 0f 1f 44 00 00 324.792342 cycles 0f 1f 80 00 00 00 00 325.657462 cycles 0f 1f 84 00 00 00 00 00 430.246643 cycles F14h: ---- Running NOP tests, 1000 NOPs x 1000000 repetitions K8: 90 510.404890 cycles 66 90 510.432117 cycles 66 66 90 510.561858 cycles 66 66 66 90 510.541865 cycles 66 66 90 66 90 1014.192782 cycles 66 66 90 66 66 90 1014.226546 cycles 66 66 66 90 66 66 90 1014.334299 cycles 66 66 66 90 66 66 66 90 1014.381205 cycles P6: 90 510.436710 cycles 66 90 510.448229 cycles 0f 1f 00 510.545100 cycles 0f 1f 40 00 510.502792 cycles 0f 1f 44 00 00 510.589517 cycles 66 0f 1f 44 00 00 510.611462 cycles 0f 1f 80 00 00 00 00 511.166794 cycles 0f 1f 84 00 00 00 00 00 511.651641 cycles F15h: ----- Running NOP tests, 1000 NOPs x 1000000 repetitions K8: 90 243.128396 cycles 66 90 243.129883 cycles 66 66 90 243.131631 cycles 66 66 66 90 242.499324 cycles 66 66 90 66 90 481.829083 cycles 66 66 90 66 66 90 481.884413 cycles 66 66 66 90 66 66 90 481.851446 cycles 66 66 66 90 66 66 66 90 481.409220 cycles P6: 90 243.127026 cycles 66 90 243.130711 cycles 0f 1f 00 243.122747 cycles 0f 1f 40 00 242.497617 cycles 0f 1f 44 00 00 245.354461 cycles 66 0f 1f 44 00 00 361.930417 cycles 0f 1f 80 00 00 00 00 362.844944 cycles 0f 1f 84 00 00 00 00 00 480.514948 cycles F16h: ----- Running NOP tests, 1000 NOPs x 1000000 repetitions K8: 90 507.793298 cycles 66 90 507.789636 cycles 66 66 90 507.826490 cycles 66 66 66 90 507.859075 cycles 66 66 90 66 90 1008.663129 cycles 66 66 90 66 66 90 1008.696259 cycles 66 66 66 90 66 66 90 1008.692517 cycles 66 66 66 90 66 66 66 90 1008.755399 cycles P6: 90 507.795232 cycles 66 90 507.794761 cycles 0f 1f 00 507.834901 cycles 0f 1f 40 00 507.822629 cycles 0f 1f 44 00 00 507.838493 cycles 66 0f 1f 44 00 00 507.908597 cycles 0f 1f 80 00 00 00 00 507.946417 cycles 0f 1f 84 00 00 00 00 00 507.954960 cycles Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431332153-18566-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef653193160..b0932c4341b3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -227,6 +227,15 @@ void __init arch_init_ideal_nops(void) #endif } break; + + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 > 0xf) { + ideal_nops = p6_nops; + return; + } + + /* fall through */ + default: #ifdef CONFIG_X86_64 ideal_nops = k8_nops; -- cgit v1.2.3