diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2015-05-19 17:12:32 +0300 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2015-05-19 17:12:32 +0300 |
commit | c3b5d3cea508d2c8ff493ef18c45a9cc58fb7015 (patch) | |
tree | a80672ee82fcc3d9c8d486e53731eb19cd968eb0 /arch/x86 | |
parent | daa67b4b70568a07fef3cffacb2055891bf42ddb (diff) | |
parent | e26081808edadfd257c6c9d81014e3b25e9a6118 (diff) | |
download | linux-c3b5d3cea508d2c8ff493ef18c45a9cc58fb7015.tar.xz |
Merge branch 'linus' into timers/core
Make sure the upstream fixes are applied before adding further
modifications.
Diffstat (limited to 'arch/x86')
42 files changed, 311 insertions, 311 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6049d587599e..226d5696e1d1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,6 +22,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 20028da8ae18..72484a645f05 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -43,10 +43,6 @@ config EARLY_PRINTK with klogd/syslogd or the X server. You should normally N here, unless you want to debug such a crash. -config EARLY_PRINTK_INTEL_MID - bool "Early printk for Intel MID platform support" - depends on EARLY_PRINTK && X86_INTEL_MID - config EARLY_PRINTK_DBGP bool "Early printk via EHCI debug port" depends on EARLY_PRINTK && PCI diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ef17683484e9..48304b89b601 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1109,6 +1109,8 @@ struct boot_params *make_boot_params(struct efi_config *c) if (!cmdline_ptr) goto fail; hdr->cmd_line_ptr = (unsigned long)cmdline_ptr; + /* Fill in upper bits of command line address, NOP on 32 bit */ + boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32; hdr->ramdisk_image = 0; hdr->ramdisk_size = 0; diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index a4771dcd1fcf..1f20b35d8573 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -79,7 +79,7 @@ NUM_BLKS = %rdx c = %rcx d = %r8 e = %rdx -y3 = %rdi +y3 = %rsi TBL = %rbp diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a821b1cd4fa7..72bf2680f819 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -427,6 +427,13 @@ sysretl_from_sys_call: * cs and ss are loaded from MSRs. * (Note: 32bit->32bit SYSRET is different: since r11 * does not exist, it merely sets eflags.IF=1). + * + * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss + * descriptor is not reinitialized. This means that we must + * avoid SYSRET with SS == NULL, which could happen if we schedule, + * exit the kernel, and re-enter using an interrupt vector. (All + * interrupt entries on x86_64 set SS to NULL.) We prevent that + * from happening by reloading SS in __switch_to. */ USERGS_SYSRET32 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7ee9b94d9921..3d6606fb97d0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -265,6 +265,7 @@ #define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index e42f758a0fbd..055ea9941dd5 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper; /* Recognized hypervisors */ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; -extern const struct hypervisor_x86 x86_hyper_xen_hvm; +extern const struct hypervisor_x86 x86_hyper_xen; extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 705d35708a50..7c5af123bdbd 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options; #define SFI_MTMR_MAX_NUM 8 #define SFI_MRTC_MAX 8 -extern struct console early_hsu_console; -extern void hsu_early_console_init(const char *); - extern void intel_scu_devices_create(void); extern void intel_scu_devices_destroy(void); diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index e2d4a4afa8c3..3bbc07a57a31 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -20,13 +20,10 @@ extern unsigned long switcher_addr; /* Found in switcher.S */ extern unsigned long default_idt_entries[]; -/* Declarations for definitions in lguest_guest.S */ -extern char lguest_noirq_start[], lguest_noirq_end[]; +/* Declarations for definitions in arch/x86/lguest/head_32.S */ +extern char lguest_noirq_iret[]; extern const char lgstart_cli[], lgend_cli[]; -extern const char lgstart_sti[], lgend_sti[]; -extern const char lgstart_popf[], lgend_popf[]; extern const char lgstart_pushf[], lgend_pushf[]; -extern const char lgstart_iret[], lgend_iret[]; extern void lguest_iret(void); extern void lguest_init(void); diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 25b1cc07d496..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; - u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h index 460b84f64556..8378b8c9109c 100644 --- a/arch/x86/include/asm/serial.h +++ b/arch/x86/include/asm/serial.h @@ -12,11 +12,11 @@ /* Standard COM flags (except for COM4, because of the 8514 problem) */ #ifdef CONFIG_SERIAL_DETECT_IRQ -# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) -# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | ASYNC_AUTO_IRQ) +# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ) +# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | UPF_AUTO_IRQ) #else -# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | 0 ) -# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | 0 ) +# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | 0 ) +# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | 0 ) #endif #define SERIAL_PORT_DFNS \ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index cf87de3fc390..64b611782ef0 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -169,7 +169,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock) struct __raw_tickets tmp = READ_ONCE(lock->tickets); tmp.head &= ~TICKET_SLOWPATH_FLAG; - return (tmp.tail - tmp.head) > TICKET_LOCK_INC; + return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; } #define arch_spin_is_contended arch_spin_is_contended diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 358dcd338915..c44a5d53e464 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev, return false; } +static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order) +{ + return __get_free_pages(__GFP_NOWARN, order); +} + #endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 90c458e66e13..ce6068dbcfbc 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -225,6 +225,8 @@ #define HV_STATUS_INVALID_HYPERCALL_CODE 2 #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 #define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 typedef struct _HV_REFERENCE_TSC_PAGE { diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 803b684676ff..dbe76a14c3c9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index fd470ebf924e..e4cf63301ff4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); + + /* AMD CPUs don't reset SS attributes on SYSRET */ + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 36ce402a3fa5..d820d8eae96b 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -27,8 +27,8 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = { -#ifdef CONFIG_XEN_PVHVM - &x86_hyper_xen_hvm, +#ifdef CONFIG_XEN + &x86_hyper_xen, #endif &x86_hyper_vmware, &x86_hyper_ms_hyperv, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 219d3fb423a1..3998131d1a68 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, - [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, @@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(OP_READ) ] = { /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ @@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -2533,34 +2532,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config) return x86_event_sysfs_show(page, config, event); } -static __initconst const struct x86_pmu core_pmu = { - .name = "core", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = core_pmu_enable_all, - .enable = core_pmu_enable_event, - .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic event period: - */ - .max_period = (1ULL << 31) - 1, - .get_event_constraints = intel_get_event_constraints, - .put_event_constraints = intel_put_event_constraints, - .event_constraints = intel_core_event_constraints, - .guest_get_msrs = core_guest_get_msrs, - .format_attrs = intel_arch_formats_attr, - .events_sysfs_show = intel_event_sysfs_show, -}; - struct intel_shared_regs *allocate_shared_regs(int cpu) { struct intel_shared_regs *regs; @@ -2743,6 +2714,44 @@ static struct attribute *intel_arch3_formats_attr[] = { NULL, }; +static __initconst const struct x86_pmu core_pmu = { + .name = "core", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = core_pmu_enable_all, + .enable = core_pmu_enable_event, + .disable = x86_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32-bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL<<31) - 1, + .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .event_constraints = intel_core_event_constraints, + .guest_get_msrs = core_guest_get_msrs, + .format_attrs = intel_arch_formats_attr, + .events_sysfs_show = intel_event_sysfs_show, + + /* + * Virtual (or funny metal) CPU can define x86_pmu.extra_regs + * together with PMU version 1 and thus be using core_pmu with + * shared_regs. We need following callbacks here to allocate + * it properly. + */ + .cpu_prepare = intel_pmu_cpu_prepare, + .cpu_starting = intel_pmu_cpu_starting, + .cpu_dying = intel_pmu_cpu_dying, +}; + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 10190c044a7e..5cbd4e64feb5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -721,6 +721,7 @@ static int __init rapl_pmu_init(void) break; case 60: /* Haswell */ case 69: /* Haswell-Celeron */ + case 61: /* Broadwell */ rapl_cntr_mask = RAPL_IDX_HSW; rapl_pmu_events_group.attrs = rapl_events_hsw_attr; break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015b755c..4562e9e22c60 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,13 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 @@ -472,6 +479,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +513,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 49ff55ef9b26..89427d8d4fc5 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf) if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_EARLY_PRINTK_INTEL_MID - if (!strncmp(buf, "hsu", 3)) { - hsu_early_console_init(buf + 3); - early_console_register(&early_hsu_console, keep); - } -#endif #ifdef CONFIG_EARLY_PRINTK_EFI if (!strncmp(buf, "efi", 3)) early_console_register(&early_efi_console, keep); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c7b238494b31..02c2eff7478d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -295,6 +295,15 @@ system_call_fastpath: * rflags from r11 (but RF and VM bits are forced to 0), * cs and ss are loaded from MSRs. * Restoration of rflags re-enables interrupts. + * + * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss + * descriptor is not reinitialized. This means that we should + * avoid SYSRET with SS == NULL, which could happen if we schedule, + * exit the kernel, and re-enter using an interrupt vector. (All + * interrupt entries on x86_64 set SS to NULL.) We prevent that + * from happening by reloading SS in __switch_to. (Actually + * detecting the failure in 64-bit userspace is tricky but can be + * done.) */ USERGS_SYSRET64 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8213da62b1b7..6e338e3b1dc0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif }; -EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss); +EXPORT_PER_CPU_SYMBOL(cpu_tss); #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); @@ -156,11 +156,13 @@ void flush_thread(void) /* FPU state will be reallocated lazily at the first use. */ drop_fpu(tsk); free_thread_xstate(tsk); - } else if (!used_math()) { - /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(init_fpu(tsk))) - force_sig(SIGKILL, tsk); - user_fpu_begin(); + } else { + if (!tsk_used_math(tsk)) { + /* kthread execs. TODO: cleanup this horror. */ + if (WARN_ON(init_fpu(tsk))) + force_sig(SIGKILL, tsk); + user_fpu_begin(); + } restore_init_xstate(); } } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4baaa972f52a..ddfdbf74f174 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); + if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { + /* + * AMD CPUs have a misfeature: SYSRET sets the SS selector but + * does not update the cached descriptor. As a result, if we + * do SYSRET while SS is NULL, we'll end up in user mode with + * SS apparently equal to __USER_DS but actually unusable. + * + * The straightforward workaround would be to fix it up just + * before SYSRET, but that would slow down the system call + * fast paths. Instead, we ensure that SS is never NULL in + * system call context. We do this by replacing NULL SS + * selectors at every context switch. SYSCALL sets up a valid + * SS, so the only way to get NULL is to re-enter the kernel + * from CPL 3 through an interrupt. Since that can't happen + * in the same task as a running syscall, we are guaranteed to + * context switch between every interrupt vector entry and a + * subsequent SYSRET. + * + * We read SS first because SS reads are much faster than + * writes. Out of caution, we force SS to __KERNEL_DS even if + * it previously had a different non-NULL value. + */ + unsigned short ss_sel; + savesegment(ss, ss_sel); + if (ss_sel != __KERNEL_DS) + loadsegment(ss, __KERNEL_DS); + } + return prev_p; } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20e72dd..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } -static struct pvclock_vsyscall_time_info *pvclock_vdso_info; - -static struct pvclock_vsyscall_time_info * -pvclock_get_vsyscall_user_time_info(int cpu) -{ - if (!pvclock_vdso_info) { - BUG(); - return NULL; - } - - return &pvclock_vdso_info[cpu]; -} - -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) -{ - return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; -} - #ifdef CONFIG_X86_64 -static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, - void *v) -{ - struct task_migration_notifier *mn = v; - struct pvclock_vsyscall_time_info *pvti; - - pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); - - /* this is NULL when pvclock vsyscall is not initialized */ - if (unlikely(pvti == NULL)) - return NOTIFY_DONE; - - pvti->migrate_count++; - - return NOTIFY_DONE; -} - -static struct notifier_block pvclock_migrate = { - .notifier_call = pvclock_task_migrate, -}; - /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); - pvclock_vdso_info = i; - for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } - - register_task_migration_notifier(&pvclock_migrate); - return 0; } #endif diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index 6eb5c20ee373..d090ecf08809 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -666,7 +666,7 @@ static int probe_sysfs_permissions(struct pci_dev *dev) if (r) return r; - inode = path.dentry->d_inode; + inode = d_backing_inode(path.dentry); r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); path_put(&path); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d67206a7b99a..629af0f1c5c4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, unsigned long bitmap = 1; struct kvm_lapic **dst; int i; - bool ret = false; - bool x2apic_ipi = src && apic_x2apic_mode(src); + bool ret, x2apic_ipi; *r = -1; @@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (irq->shorthand) return false; + x2apic_ipi = src && apic_x2apic_mode(src); if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) return false; + ret = true; rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); - if (!map) + if (!map) { + ret = false; goto out; - - ret = true; + } if (irq->dest_mode == APIC_DEST_PHYSICAL) { if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 146f295ee322..d43867c33bc4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, pfn = spte_to_pfn(*sptep); /* - * Only EPT supported for now; otherwise, one would need to - * find out efficiently whether the guest page tables are - * also using huge pages. + * We cannot do huge page mapping for indirect shadow pages, + * which are found on the last rmap (level = 1) when not using + * tdp; such shadow pages are synced with the page table in + * the guest, and the guest page table is using 4K page size + * mapping if the indirect sp has level = 1. */ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && @@ -4504,19 +4506,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, bool flush = false; unsigned long *rmapp; unsigned long last_index, index; - gfn_t gfn_start, gfn_end; spin_lock(&kvm->mmu_lock); - gfn_start = memslot->base_gfn; - gfn_end = memslot->base_gfn + memslot->npages - 1; - - if (gfn_start >= gfn_end) - goto out; - rmapp = memslot->arch.rmap[0]; - last_index = gfn_to_index(gfn_end, memslot->base_gfn, - PT_PAGE_TABLE_LEVEL); + last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1, + memslot->base_gfn, PT_PAGE_TABLE_LEVEL); for (index = 0; index <= last_index; ++index, ++rmapp) { if (*rmapp) @@ -4534,7 +4529,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, if (flush) kvm_flush_remote_tlbs(kvm); -out: spin_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f5e8dce8046c..f7b61687bd79 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + /* + * Pass through host's Machine Check Enable value to hw_cr4, which + * is in force while we are in guest mode. Do not let guests control + * this bit, even if host CR4.MCE == 0. + */ + unsigned long hw_cr4 = + (cr4_read_shadow() & X86_CR4_MCE) | + (cr4 & ~X86_CR4_MCE) | + (to_vmx(vcpu)->rmode.vm86_active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); if (cr4 & X86_CR4_VMXE) { /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1a81267f3f6..c73efcd03e29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1669,12 +1669,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &guest_hv_clock, sizeof(guest_hv_clock)))) return 0; - /* - * The interface expects us to write an even number signaling that the - * update is finished. Since the guest won't see the intermediate - * state, we just increase by 2 at the end. + /* This VCPU is paused, but it's legal for a guest to read another + * VCPU's kvmclock, so we really have to follow the specification where + * it says that version is odd if data is being modified, and even after + * it is consistent. + * + * Version field updates must be kept separate. This is because + * kvm_write_guest_cached might use a "rep movs" instruction, and + * writes within a string instruction are weakly ordered. So there + * are three writes overall. + * + * As a small optimization, only write the version field in the first + * and third write. The vcpu->pv_time cache is still valid, because the + * version field is the first in the struct. */ - vcpu->hv_clock.version = guest_hv_clock.version + 2; + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + + vcpu->hv_clock.version = guest_hv_clock.version + 1; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); + + smp_wmb(); /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); @@ -1695,6 +1711,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); + + smp_wmb(); + + vcpu->hv_clock.version++; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); return 0; } @@ -5799,7 +5822,6 @@ int kvm_arch_init(void *opaque) kvm_set_mmio_spte_mask(); kvm_x86_ops = ops; - kvm_init_msr_list(); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0); @@ -7253,7 +7275,14 @@ void kvm_arch_hardware_disable(void) int kvm_arch_hardware_setup(void) { - return kvm_x86_ops->hardware_setup(); + int r; + + r = kvm_x86_ops->hardware_setup(); + if (r != 0) + return r; + + kvm_init_msr_list(); + return 0; } void kvm_arch_hardware_unsetup(void) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 717908b16037..8f9a133cc099 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -87,8 +87,7 @@ struct lguest_data lguest_data = { .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, - .noirq_start = (u32)lguest_noirq_start, - .noirq_end = (u32)lguest_noirq_end, + .noirq_iret = (u32)lguest_noirq_iret, .kernel_address = PAGE_OFFSET, .blocked_interrupts = { 1 }, /* Block timer interrupts */ .syscall_vec = SYSCALL_VECTOR, @@ -262,7 +261,7 @@ PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl); PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable); /*:*/ -/* These are in i386_head.S */ +/* These are in head_32.S */ extern void lg_irq_enable(void); extern void lg_restore_fl(unsigned long flags); @@ -1368,7 +1367,7 @@ static void lguest_restart(char *reason) * fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, - * and these are in i386_head.S. + * and these are in head_32.S. */ /*G:060 We construct a table from the assembler templates: */ diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S index 6ddfe4fc23c3..d5ae63f5ec5d 100644 --- a/arch/x86/lguest/head_32.S +++ b/arch/x86/lguest/head_32.S @@ -84,7 +84,7 @@ ENTRY(lg_irq_enable) * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we * jump to send_interrupts, otherwise we're done. */ - testl $0, lguest_data+LGUEST_DATA_irq_pending + cmpl $0, lguest_data+LGUEST_DATA_irq_pending jnz send_interrupts /* * One cool thing about x86 is that you can do many things without using @@ -133,9 +133,8 @@ ENTRY(lg_restore_fl) ret /*:*/ -/* These demark the EIP range where host should never deliver interrupts. */ -.global lguest_noirq_start -.global lguest_noirq_end +/* These demark the EIP where host should never deliver interrupts. */ +.global lguest_noirq_iret /*M:004 * When the Host reflects a trap or injects an interrupt into the Guest, it @@ -168,29 +167,26 @@ ENTRY(lg_restore_fl) * So we have to copy eflags from the stack to lguest_data.irq_enabled before * we do the "iret". * - * There are two problems with this: firstly, we need to use a register to do - * the copy and secondly, the whole thing needs to be atomic. The first - * problem is easy to solve: push %eax on the stack so we can use it, and then - * restore it at the end just before the real "iret". + * There are two problems with this: firstly, we can't clobber any registers + * and secondly, the whole thing needs to be atomic. The first problem + * is solved by using "push memory"/"pop memory" instruction pair for copying. * * The second is harder: copying eflags to lguest_data.irq_enabled will turn * interrupts on before we're finished, so we could be interrupted before we - * return to userspace or wherever. Our solution to this is to surround the - * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the + * return to userspace or wherever. Our solution to this is to tell the * Host that it is *never* to interrupt us there, even if interrupts seem to be - * enabled. + * enabled. (It's not necessary to protect pop instruction, since + * data gets updated only after it completes, so we only need to protect + * one instruction, iret). */ ENTRY(lguest_iret) - pushl %eax - movl 12(%esp), %eax -lguest_noirq_start: + pushl 2*4(%esp) /* * Note the %ss: segment prefix here. Normal data accesses use the * "ds" segment, but that will have already been restored for whatever * we're returning to (such as userspace): we can't trust it. The %ss: * prefix makes sure we use the stack segment, which is still valid. */ - movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled - popl %eax + popl %ss:lguest_data+LGUEST_DATA_irq_enabled +lguest_noirq_iret: iret -lguest_noirq_end: diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 1f33b3d1fd68..0a42327a59d7 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -82,7 +82,7 @@ copy_user_handle_tail(char *to, char *from, unsigned len) clac(); /* If the destination is a kernel buffer, we always clear the end */ - if ((unsigned long)to >= TASK_SIZE_MAX) + if (!__addr_ok(to)) memset(to, 0, len); return len; } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 5ead4d6cf3a7..70e7444c6835 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -351,18 +351,20 @@ int arch_ioremap_pmd_supported(void) */ void *xlate_dev_mem_ptr(phys_addr_t phys) { - void *addr; - unsigned long start = phys & PAGE_MASK; + unsigned long start = phys & PAGE_MASK; + unsigned long offset = phys & ~PAGE_MASK; + unsigned long vaddr; /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void __force *)ioremap_cache(start, PAGE_SIZE); - if (addr) - addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); + vaddr = (unsigned long)ioremap_cache(start, PAGE_SIZE); + /* Only add the offset on success and return NULL if the ioremap() failed: */ + if (vaddr) + vaddr += offset; - return addr; + return (void *)vaddr; } void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 987514396c1e..99f76103c6b7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, if (is_ereg(dst_reg)) EMIT1(0x41); EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); + + /* emit 'movzwl eax, ax' */ + if (is_ereg(dst_reg)) + EMIT3(0x45, 0x0F, 0xB7); + else + EMIT2(0x0F, 0xB7); + EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: /* emit 'bswap eax' to swap lower 4 bytes */ @@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm32) { + case 16: + /* emit 'movzwl eax, ax' to zero extend 16-bit + * into 64 bit + */ + if (is_ereg(dst_reg)) + EMIT3(0x45, 0x0F, 0xB7); + else + EMIT2(0x0F, 0xB7); + EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); + break; + case 32: + /* emit 'mov eax, eax' to clear upper 32-bits */ + if (is_ereg(dst_reg)) + EMIT1(0x45); + EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); + break; + case 64: + /* nop */ + break; + } break; /* ST: *(u8*)(dst_reg + off) = imm */ diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index e4695985f9de..d93963340c3c 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge) kfree(info); } +/* + * An IO port or MMIO resource assigned to a PCI host bridge may be + * consumed by the host bridge itself or available to its child + * bus/devices. The ACPI specification defines a bit (Producer/Consumer) + * to tell whether the resource is consumed by the host bridge itself, + * but firmware hasn't used that bit consistently, so we can't rely on it. + * + * On x86 and IA64 platforms, all IO port and MMIO resources are assumed + * to be available to child bus/devices except one special case: + * IO port [0xCF8-0xCFF] is consumed by the host bridge itself + * to access PCI configuration space. + * + * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. + */ +static bool resource_is_pcicfg_ioport(struct resource *res) +{ + return (res->flags & IORESOURCE_IO) && + res->start == 0xCF8 && res->end == 0xCFF; +} + static void probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, int busnum, int domain, @@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info, "no IO and memory resources present in _CRS\n"); else resource_list_for_each_entry_safe(entry, tmp, list) { - if ((entry->res->flags & IORESOURCE_WINDOW) == 0 || - (entry->res->flags & IORESOURCE_DISABLED)) + if ((entry->res->flags & IORESOURCE_DISABLED) || + resource_is_pcicfg_ioport(entry->res)) resource_list_destroy_entry(entry); else entry->res->name = info->name; diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index 0a8ee703b9fa..0ce1b1913673 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile @@ -1,5 +1,4 @@ obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o -obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o # SFI specific code ifdef CONFIG_X86_INTEL_MID diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c deleted file mode 100644 index 4e720829ab90..000000000000 --- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * early_printk_intel_mid.c - early consoles for Intel MID platforms - * - * Copyright (c) 2008-2010, Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -/* - * This file implements early console named hsu. - * hsu is based on a High Speed UART device which only exists in the Medfield - * platform - */ - -#include <linux/serial_reg.h> -#include <linux/serial_mfd.h> -#include <linux/console.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/io.h> - -#include <asm/fixmap.h> -#include <asm/pgtable.h> -#include <asm/intel-mid.h> - -/* - * Following is the early console based on Medfield HSU (High - * Speed UART) device. - */ -#define HSU_PORT_BASE 0xffa28080 - -static void __iomem *phsu; - -void hsu_early_console_init(const char *s) -{ - unsigned long paddr, port = 0; - u8 lcr; - - /* - * Select the early HSU console port if specified by user in the - * kernel command line. - */ - if (*s && !kstrtoul(s, 10, &port)) - port = clamp_val(port, 0, 2); - - paddr = HSU_PORT_BASE + port * 0x80; - phsu = (void __iomem *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); - - /* Disable FIFO */ - writeb(0x0, phsu + UART_FCR); - - /* Set to default 115200 bps, 8n1 */ - lcr = readb(phsu + UART_LCR); - writeb((0x80 | lcr), phsu + UART_LCR); - writeb(0x18, phsu + UART_DLL); - writeb(lcr, phsu + UART_LCR); - writel(0x3600, phsu + UART_MUL*4); - - writeb(0x8, phsu + UART_MCR); - writeb(0x7, phsu + UART_FCR); - writeb(0x3, phsu + UART_LCR); - - /* Clear IRQ status */ - readb(phsu + UART_LSR); - readb(phsu + UART_RX); - readb(phsu + UART_IIR); - readb(phsu + UART_MSR); - - /* Enable FIFO */ - writeb(0x7, phsu + UART_FCR); -} - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -static void early_hsu_putc(char ch) -{ - unsigned int timeout = 10000; /* 10ms */ - u8 status; - - while (--timeout) { - status = readb(phsu + UART_LSR); - if (status & BOTH_EMPTY) - break; - udelay(1); - } - - /* Only write the char when there was no timeout */ - if (timeout) - writeb(ch, phsu + UART_TX); -} - -static void early_hsu_write(struct console *con, const char *str, unsigned n) -{ - int i; - - for (i = 0; i < n && *str; i++) { - if (*str == '\n') - early_hsu_putc('\r'); - early_hsu_putc(*str); - str++; - } -} - -struct console early_hsu_console = { - .name = "earlyhsu", - .write = early_hsu_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 275a3a8b78af..e97032069f88 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi +HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 40d2473836c9..9793322751e0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; - u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * When looping to get a consistent (time-info, tsc) pair, we - * also need to deal with the possibility we can switch vcpus, - * so make sure we always re-fetch time-info for the current vcpu. + * Note: hypervisor must guarantee that: + * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. + * 2. that per-CPU pvclock time info is updated if the + * underlying CPU changes. + * 3. that version is increased whenever underlying CPU + * changes. + * */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode) * __getcpu() calls (Gleb). */ - /* Make sure migrate_count will change if we leave the VCPU. */ - do { - pvti = get_pvti(cpu); - migrate_count = pvti->migrate_count; - - cpu1 = cpu; - cpu = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1)); + pvti = get_pvti(cpu); version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* * Test we're still on the cpu as well as the version. - * - We must read TSC of pvti's VCPU. - * - KVM doesn't follow the versioning protocol, so data could - * change before version if we left the VCPU. + * We could have been migrated just after the first + * vgetcpu but before fetching the version, so we + * wouldn't notice a version change. */ - smp_rmb(); - } while (unlikely((pvti->pvti.version & 1) || - pvti->pvti.version != version || - pvti->migrate_count != migrate_count)); + cpu1 = __getcpu() & VGETCPU_CPU_MASK; + } while (unlikely(cpu != cpu1 || + (pvti->pvti.version & 1) || + pvti->pvti.version != version)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 94578efd3067..46957ead3060 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = { static void __init xen_hvm_guest_init(void) { + if (xen_pv_domain()) + return; + init_hvm_pv_info(); xen_hvm_init_shared_info(); @@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); } +#endif static bool xen_nopv = false; static __init int xen_parse_nopv(char *arg) @@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); -static uint32_t __init xen_hvm_platform(void) +static uint32_t __init xen_platform(void) { if (xen_nopv) return 0; - if (xen_pv_domain()) - return 0; - return xen_cpuid_base(); } @@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void) } EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); -const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { - .name = "Xen HVM", - .detect = xen_hvm_platform, +static void xen_set_cpu_features(struct cpuinfo_x86 *c) +{ + if (xen_pv_domain()) + clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); +} + +const struct hypervisor_x86 x86_hyper_xen = { + .name = "Xen", + .detect = xen_platform, +#ifdef CONFIG_XEN_PVHVM .init_platform = xen_hvm_guest_init, +#endif .x2apic_available = xen_x2apic_para_available, + .set_cpu_features = xen_set_cpu_features, }; -EXPORT_SYMBOL(x86_hyper_xen_hvm); -#endif +EXPORT_SYMBOL(x86_hyper_xen); diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9497698645a..53b4c0811f4f 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data) tick_resume_local(); } +static void xen_vcpu_notify_suspend(void *data) +{ + tick_suspend_local(); +} + void xen_arch_resume(void) { on_each_cpu(xen_vcpu_notify_restore, NULL, 1); } + +void xen_arch_suspend(void) +{ + on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); +} |