diff options
Diffstat (limited to 'arch/x86')
37 files changed, 648 insertions, 297 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ccfededfe470..4a10ba9e95da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,6 +24,7 @@ config X86 select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL @@ -82,6 +83,8 @@ config X86 select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP select HAVE_ARCH_KGDB select HAVE_ARCH_KMEMCHECK + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY if X86_64 select HAVE_ARCH_TRACEHOOK @@ -177,12 +180,23 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config MMU def_bool y +config ARCH_MMAP_RND_BITS_MIN + default 28 if 64BIT + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 32 if 64BIT + default 16 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 8 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + config SBUS bool diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 110253ce83af..9b18ed97a8a2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,23 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config STRICT_DEVMEM - bool "Filter access to /dev/mem" - ---help--- - If this option is disabled, you allow userspace (root) access to all - of memory, including kernel and userspace memory. Accidental - access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. Note that with PAT support - enabled, even in this case there are restrictions on /dev/mem - use due to the cache aliasing requirements. - - If this option is switched on, the /dev/mem file only allows - userspace access to PCI space and the BIOS code and data regions. - This is sufficient for dosemu and X and all common users of - /dev/mem. - - If in doubt, say Y. - config X86_VERBOSE_BOOTUP bool "Enable verbose x86 bootup info messages" default y diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 8602f06c759f..1a50e09c945b 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -126,23 +126,23 @@ static notrace cycle_t vread_pvclock(int *mode) * * On Xen, we don't appear to have that guarantee, but Xen still * supplies a valid seqlock using the version field. - + * * We only do pvclock vdso timing at all if * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to * mean that all vCPUs have matching pvti and that the TSC is * synced, so we can just look at vCPU 0's pvti. */ - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { - *mode = VCLOCK_NONE; - return 0; - } - do { version = pvti->version; smp_rmb(); + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { + *mode = VCLOCK_NONE; + return 0; + } + tsc = rdtsc_ordered(); pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; pvti_tsc_shift = pvti->tsc_shift; diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 0681d2532527..a584e1c50918 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -31,20 +31,10 @@ #endif #define dma_wmb() barrier() -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() dma_rmb() -#define smp_wmb() barrier() -#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else /* !SMP */ -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) -#endif /* SMP */ - -#define read_barrier_depends() do { } while (0) -#define smp_read_barrier_depends() do { } while (0) +#define __smp_mb() mb() +#define __smp_rmb() dma_rmb() +#define __smp_wmb() barrier() +#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) #if defined(CONFIG_X86_PPRO_FENCE) @@ -53,31 +43,31 @@ * model and we should fall back to full barriers. */ -#define smp_store_release(p, v) \ +#define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ - smp_mb(); \ + __smp_mb(); \ WRITE_ONCE(*p, v); \ } while (0) -#define smp_load_acquire(p) \ +#define __smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ - smp_mb(); \ + __smp_mb(); \ ___p1; \ }) #else /* regular x86 TSO memory ordering */ -#define smp_store_release(p, v) \ +#define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ WRITE_ONCE(*p, v); \ } while (0) -#define smp_load_acquire(p) \ +#define __smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ @@ -88,7 +78,9 @@ do { \ #endif /* Atomic operations are already serializing on x86 */ -#define smp_mb__before_atomic() barrier() -#define smp_mb__after_atomic() barrier() +#define __smp_mb__before_atomic() barrier() +#define __smp_mb__after_atomic() barrier() + +#include <asm-generic/barrier.h> #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4fa687a47a62..6b8d6e8cd449 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -27,7 +27,7 @@ #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -#define BOOT_HEAP_SIZE 0x8000 +#define BOOT_HEAP_SIZE 0x10000 #endif /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index eadcdd5bb946..0fd440df63f1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -42,6 +42,7 @@ extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); +extern u64 fpu__get_supported_xfeatures_mask(void); /* * Debugging facility: diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 3a6c89b70307..af30fdeb140d 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -20,15 +20,16 @@ /* Supported features which support lazy state saving */ #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE | \ + XFEATURE_MASK_SSE) + +/* Supported features which require eager state saving */ +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_YMM | \ - XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM) -/* Supported features which require eager state saving */ -#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) - /* All currently supported features */ #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h new file mode 100644 index 000000000000..201eb9dce595 --- /dev/null +++ b/arch/x86/include/asm/intel_punit_ipc.h @@ -0,0 +1,101 @@ +#ifndef _ASM_X86_INTEL_PUNIT_IPC_H_ +#define _ASM_X86_INTEL_PUNIT_IPC_H_ + +/* + * Three types of 8bit P-Unit IPC commands are supported, + * bit[7:6]: [00]: BIOS; [01]: GTD; [10]: ISPD. + */ +typedef enum { + BIOS_IPC = 0, + GTDRIVER_IPC, + ISPDRIVER_IPC, + RESERVED_IPC, +} IPC_TYPE; + +#define IPC_TYPE_OFFSET 6 +#define IPC_PUNIT_BIOS_CMD_BASE (BIOS_IPC << IPC_TYPE_OFFSET) +#define IPC_PUNIT_GTD_CMD_BASE (GTDDRIVER_IPC << IPC_TYPE_OFFSET) +#define IPC_PUNIT_ISPD_CMD_BASE (ISPDRIVER_IPC << IPC_TYPE_OFFSET) +#define IPC_PUNIT_CMD_TYPE_MASK (RESERVED_IPC << IPC_TYPE_OFFSET) + +/* BIOS => Pcode commands */ +#define IPC_PUNIT_BIOS_ZERO (IPC_PUNIT_BIOS_CMD_BASE | 0x00) +#define IPC_PUNIT_BIOS_VR_INTERFACE (IPC_PUNIT_BIOS_CMD_BASE | 0x01) +#define IPC_PUNIT_BIOS_READ_PCS (IPC_PUNIT_BIOS_CMD_BASE | 0x02) +#define IPC_PUNIT_BIOS_WRITE_PCS (IPC_PUNIT_BIOS_CMD_BASE | 0x03) +#define IPC_PUNIT_BIOS_READ_PCU_CONFIG (IPC_PUNIT_BIOS_CMD_BASE | 0x04) +#define IPC_PUNIT_BIOS_WRITE_PCU_CONFIG (IPC_PUNIT_BIOS_CMD_BASE | 0x05) +#define IPC_PUNIT_BIOS_READ_PL1_SETTING (IPC_PUNIT_BIOS_CMD_BASE | 0x06) +#define IPC_PUNIT_BIOS_WRITE_PL1_SETTING (IPC_PUNIT_BIOS_CMD_BASE | 0x07) +#define IPC_PUNIT_BIOS_TRIGGER_VDD_RAM (IPC_PUNIT_BIOS_CMD_BASE | 0x08) +#define IPC_PUNIT_BIOS_READ_TELE_INFO (IPC_PUNIT_BIOS_CMD_BASE | 0x09) +#define IPC_PUNIT_BIOS_READ_TELE_TRACE_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0a) +#define IPC_PUNIT_BIOS_WRITE_TELE_TRACE_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0b) +#define IPC_PUNIT_BIOS_READ_TELE_EVENT_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0c) +#define IPC_PUNIT_BIOS_WRITE_TELE_EVENT_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0d) +#define IPC_PUNIT_BIOS_READ_TELE_TRACE (IPC_PUNIT_BIOS_CMD_BASE | 0x0e) +#define IPC_PUNIT_BIOS_WRITE_TELE_TRACE (IPC_PUNIT_BIOS_CMD_BASE | 0x0f) +#define IPC_PUNIT_BIOS_READ_TELE_EVENT (IPC_PUNIT_BIOS_CMD_BASE | 0x10) +#define IPC_PUNIT_BIOS_WRITE_TELE_EVENT (IPC_PUNIT_BIOS_CMD_BASE | 0x11) +#define IPC_PUNIT_BIOS_READ_MODULE_TEMP (IPC_PUNIT_BIOS_CMD_BASE | 0x12) +#define IPC_PUNIT_BIOS_RESERVED (IPC_PUNIT_BIOS_CMD_BASE | 0x13) +#define IPC_PUNIT_BIOS_READ_VOLTAGE_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x14) +#define IPC_PUNIT_BIOS_WRITE_VOLTAGE_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x15) +#define IPC_PUNIT_BIOS_READ_RATIO_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x16) +#define IPC_PUNIT_BIOS_WRITE_RATIO_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x17) +#define IPC_PUNIT_BIOS_READ_VF_GL_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x18) +#define IPC_PUNIT_BIOS_WRITE_VF_GL_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x19) +#define IPC_PUNIT_BIOS_READ_FM_SOC_TEMP_THRESH (IPC_PUNIT_BIOS_CMD_BASE | 0x1a) +#define IPC_PUNIT_BIOS_WRITE_FM_SOC_TEMP_THRESH (IPC_PUNIT_BIOS_CMD_BASE | 0x1b) + +/* GT Driver => Pcode commands */ +#define IPC_PUNIT_GTD_ZERO (IPC_PUNIT_GTD_CMD_BASE | 0x00) +#define IPC_PUNIT_GTD_CONFIG (IPC_PUNIT_GTD_CMD_BASE | 0x01) +#define IPC_PUNIT_GTD_READ_ICCP_LIC_CDYN_SCAL (IPC_PUNIT_GTD_CMD_BASE | 0x02) +#define IPC_PUNIT_GTD_WRITE_ICCP_LIC_CDYN_SCAL (IPC_PUNIT_GTD_CMD_BASE | 0x03) +#define IPC_PUNIT_GTD_GET_WM_VAL (IPC_PUNIT_GTD_CMD_BASE | 0x06) +#define IPC_PUNIT_GTD_WRITE_CONFIG_WISHREQ (IPC_PUNIT_GTD_CMD_BASE | 0x07) +#define IPC_PUNIT_GTD_READ_REQ_DUTY_CYCLE (IPC_PUNIT_GTD_CMD_BASE | 0x16) +#define IPC_PUNIT_GTD_DIS_VOL_FREQ_CHG_REQUEST (IPC_PUNIT_GTD_CMD_BASE | 0x17) +#define IPC_PUNIT_GTD_DYNA_DUTY_CYCLE_CTRL (IPC_PUNIT_GTD_CMD_BASE | 0x1a) +#define IPC_PUNIT_GTD_DYNA_DUTY_CYCLE_TUNING (IPC_PUNIT_GTD_CMD_BASE | 0x1c) + +/* ISP Driver => Pcode commands */ +#define IPC_PUNIT_ISPD_ZERO (IPC_PUNIT_ISPD_CMD_BASE | 0x00) +#define IPC_PUNIT_ISPD_CONFIG (IPC_PUNIT_ISPD_CMD_BASE | 0x01) +#define IPC_PUNIT_ISPD_GET_ISP_LTR_VAL (IPC_PUNIT_ISPD_CMD_BASE | 0x02) +#define IPC_PUNIT_ISPD_ACCESS_IU_FREQ_BOUNDS (IPC_PUNIT_ISPD_CMD_BASE | 0x03) +#define IPC_PUNIT_ISPD_READ_CDYN_LEVEL (IPC_PUNIT_ISPD_CMD_BASE | 0x04) +#define IPC_PUNIT_ISPD_WRITE_CDYN_LEVEL (IPC_PUNIT_ISPD_CMD_BASE | 0x05) + +/* Error codes */ +#define IPC_PUNIT_ERR_SUCCESS 0 +#define IPC_PUNIT_ERR_INVALID_CMD 1 +#define IPC_PUNIT_ERR_INVALID_PARAMETER 2 +#define IPC_PUNIT_ERR_CMD_TIMEOUT 3 +#define IPC_PUNIT_ERR_CMD_LOCKED 4 +#define IPC_PUNIT_ERR_INVALID_VR_ID 5 +#define IPC_PUNIT_ERR_VR_ERR 6 + +#if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC) + +int intel_punit_ipc_simple_command(int cmd, int para1, int para2); +int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out); + +#else + +static inline int intel_punit_ipc_simple_command(int cmd, + int para1, int para2) +{ + return -ENODEV; +} + +static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, + u32 *in, u32 *out) +{ + return -ENODEV; +} + +#endif /* CONFIG_INTEL_PUNIT_IPC */ + +#endif diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h new file mode 100644 index 000000000000..ed65fe701de5 --- /dev/null +++ b/arch/x86/include/asm/intel_telemetry.h @@ -0,0 +1,147 @@ +/* + * Intel SOC Telemetry Driver Header File + * Copyright (C) 2015, Intel Corporation. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#ifndef INTEL_TELEMETRY_H +#define INTEL_TELEMETRY_H + +#define TELEM_MAX_EVENTS_SRAM 28 +#define TELEM_MAX_OS_ALLOCATED_EVENTS 20 + +enum telemetry_unit { + TELEM_PSS = 0, + TELEM_IOSS, + TELEM_UNIT_NONE +}; + +struct telemetry_evtlog { + u32 telem_evtid; + u64 telem_evtlog; +}; + +struct telemetry_evtconfig { + /* Array of Event-IDs to Enable */ + u32 *evtmap; + + /* Number of Events (<29) in evtmap */ + u8 num_evts; + + /* Sampling period */ + u8 period; +}; + +struct telemetry_evtmap { + const char *name; + u32 evt_id; +}; + +struct telemetry_unit_config { + struct telemetry_evtmap *telem_evts; + void __iomem *regmap; + u32 ssram_base_addr; + u8 ssram_evts_used; + u8 curr_period; + u8 max_period; + u8 min_period; + u32 ssram_size; + +}; + +struct telemetry_plt_config { + struct telemetry_unit_config pss_config; + struct telemetry_unit_config ioss_config; + struct mutex telem_trace_lock; + struct mutex telem_lock; + bool telem_in_use; +}; + +struct telemetry_core_ops { + int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period, + u8 *ioss_min_period, u8 *ioss_max_period); + + int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig, + struct telemetry_evtconfig *ioss_evtconfig, + int pss_len, int ioss_len); + + int (*update_events)(struct telemetry_evtconfig pss_evtconfig, + struct telemetry_evtconfig ioss_evtconfig); + + int (*set_sampling_period)(u8 pss_period, u8 ioss_period); + + int (*get_trace_verbosity)(enum telemetry_unit telem_unit, + u32 *verbosity); + + int (*set_trace_verbosity)(enum telemetry_unit telem_unit, + u32 verbosity); + + int (*raw_read_eventlog)(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, + int len, int log_all_evts); + + int (*read_eventlog)(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, + int len, int log_all_evts); + + int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts, + u32 *pss_evtmap, u32 *ioss_evtmap); + + int (*reset_events)(void); +}; + +int telemetry_set_pltdata(struct telemetry_core_ops *ops, + struct telemetry_plt_config *pltconfig); + +int telemetry_clear_pltdata(void); + +int telemetry_pltconfig_valid(void); + +int telemetry_get_evtname(enum telemetry_unit telem_unit, + const char **name, int len); + +int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig, + struct telemetry_evtconfig ioss_evtconfig); + +int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts, + u32 *pss_evtmap, u32 *ioss_evtmap); + +int telemetry_reset_events(void); + +int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config, + struct telemetry_evtconfig *ioss_config, + int pss_len, int ioss_len); + +int telemetry_read_events(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_raw_read_events(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_read_eventlog(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period, + u8 *ioss_min_period, u8 *ioss_max_period); + +int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period); + +int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit, + u32 verbosity); + +int telemetry_get_trace_verbosity(enum telemetry_unit telem_unit, + u32 *verbosity); + +#endif /* INTEL_TELEMETRY_H */ diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 3bbc07a57a31..73d0c9b92087 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -12,7 +12,9 @@ #define GUEST_PL 1 /* Page for Switcher text itself, then two pages per cpu */ -#define TOTAL_SWITCHER_PAGES (1 + 2 * nr_cpu_ids) +#define SWITCHER_TEXT_PAGES (1) +#define SWITCHER_STACK_PAGES (2 * nr_cpu_ids) +#define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES) /* Where we map the Switcher, in both Host and Guest. */ extern unsigned long switcher_addr; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 379cd3658799..bfd9b2a35a0b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -116,8 +116,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #endif cpumask_set_cpu(cpu, mm_cpumask(next)); - /* Re-load page tables */ + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. + * + */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ @@ -156,10 +184,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); + /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. + * + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d3eee663c41f..0687c4748b8f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -162,20 +162,22 @@ static inline int pmd_large(pmd_t pte) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return pmd_val(pmd) & _PAGE_SPLITTING; -} - static inline int pmd_trans_huge(pmd_t pmd) { - return pmd_val(pmd) & _PAGE_PSE; + return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } static inline int has_transparent_hugepage(void) { return cpu_has_pse; } + +#ifdef __HAVE_ARCH_PTE_DEVMAP +static inline int pmd_devmap(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_DEVMAP); +} +#endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline pte_t pte_set_flags(pte_t pte, pteval_t set) @@ -252,6 +254,11 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte_set_flags(pte, _PAGE_SPECIAL); } +static inline pte_t pte_mkdevmap(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP); +} + static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); @@ -271,6 +278,11 @@ static inline pmd_t pmd_mkold(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_ACCESSED); } +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_DIRTY); +} + static inline pmd_t pmd_wrprotect(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_RW); @@ -281,6 +293,11 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } +static inline pmd_t pmd_mkdevmap(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_DEVMAP); +} + static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_PSE); @@ -462,6 +479,13 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } +#ifdef __HAVE_ARCH_PTE_DEVMAP +static inline int pte_devmap(pte_t a) +{ + return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; +} +#endif + #define pte_accessible pte_accessible static inline bool pte_accessible(struct mm_struct *mm, pte_t a) { @@ -808,10 +832,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmdp); - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index a471cadb9630..04c27a013165 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -22,10 +22,11 @@ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 -#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ -#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ +#define _PAGE_BIT_SOFTW4 58 /* available for programmer */ +#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ /* If _PAGE_BIT_PRESENT is clear, we use these: */ /* - if the user mapped it with PROT_NONE; pte_present gives true */ @@ -46,7 +47,6 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) -#define _PAGE_SPLITTING (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING) #define __HAVE_ARCH_PTE_SPECIAL #ifdef CONFIG_KMEMCHECK @@ -85,8 +85,11 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) +#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) +#define __HAVE_ARCH_PTE_DEVMAP #else #define _PAGE_NX (_AT(pteval_t, 0)) +#define _PAGE_DEVMAP (_AT(pteval_t, 0)) #endif #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 7249e6d0902d..5973a2f3db3d 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -55,6 +55,7 @@ enum sst_audio_device_id_mrfld { PIPE_MEDIA0_IN = 0x8F, PIPE_MEDIA1_IN = 0x90, PIPE_MEDIA2_IN = 0x91, + PIPE_MEDIA3_IN = 0x9C, PIPE_RSVD = 0xFF, }; diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index d8ce3ec816ab..1544fabcd7f9 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -132,12 +132,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) { void *vaddr = (void __force *)addr; - /* TODO: implement the zeroing via non-temporal writes */ - if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) - clear_page(vaddr); - else - memset(vaddr, 0, size); - + memset(vaddr, 0, size); __arch_wb_cache_pmem(vaddr, size); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e678ddeed030..a07956a08936 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -434,8 +434,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) */ int ht_nodeid = c->initial_apicid; - if (ht_nodeid >= 0 && - __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = __apicid_to_node[ht_nodeid]; /* Pick a nearby node */ if (!node_online(node)) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index db9a675e751b..bca14c899137 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -547,6 +547,7 @@ static const struct pci_device_id intel_stolen_ids[] __initconst = { INTEL_CHV_IDS(&chv_stolen_funcs), INTEL_SKL_IDS(&gen9_stolen_funcs), INTEL_BXT_IDS(&gen9_stolen_funcs), + INTEL_KBL_IDS(&gen9_stolen_funcs), }; static void __init intel_graphics_stolen(int num, int slot, int func) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7b2978ab30df..6d9f0a7ef4c8 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -3,8 +3,11 @@ */ #include <asm/fpu/internal.h> #include <asm/tlbflush.h> +#include <asm/setup.h> +#include <asm/cmdline.h> #include <linux/sched.h> +#include <linux/init.h> /* * Initialize the TS bit in CR0 according to the style of context-switches @@ -270,20 +273,52 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -static int __init eager_fpu_setup(char *s) +/* + * Find supported xfeatures based on cpu features and command-line input. + * This must be called after fpu__init_parse_early_param() is called and + * xfeatures_mask is enumerated. + */ +u64 __init fpu__get_supported_xfeatures_mask(void) { - if (!strcmp(s, "on")) - eagerfpu = ENABLE; - else if (!strcmp(s, "off")) - eagerfpu = DISABLE; - else if (!strcmp(s, "auto")) - eagerfpu = AUTO; - return 1; + /* Support all xfeatures known to us */ + if (eagerfpu != DISABLE) + return XCNTXT_MASK; + + /* Warning of xfeatures being disabled for no eagerfpu mode */ + if (xfeatures_mask & XFEATURE_MASK_EAGER) { + pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", + xfeatures_mask & XFEATURE_MASK_EAGER); + } + + /* Return a mask that masks out all features requiring eagerfpu mode */ + return ~XFEATURE_MASK_EAGER; +} + +/* + * Disable features dependent on eagerfpu. + */ +static void __init fpu__clear_eager_fpu_features(void) +{ + setup_clear_cpu_cap(X86_FEATURE_MPX); + setup_clear_cpu_cap(X86_FEATURE_AVX); + setup_clear_cpu_cap(X86_FEATURE_AVX2); + setup_clear_cpu_cap(X86_FEATURE_AVX512F); + setup_clear_cpu_cap(X86_FEATURE_AVX512PF); + setup_clear_cpu_cap(X86_FEATURE_AVX512ER); + setup_clear_cpu_cap(X86_FEATURE_AVX512CD); } -__setup("eagerfpu=", eager_fpu_setup); /* * Pick the FPU context switching strategy: + * + * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of + * the following is true: + * + * (1) the cpu has xsaveopt, as it has the optimization and doing eager + * FPU switching has a relatively low cost compared to a plain xsave; + * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU + * switching. Should the kernel boot with noxsaveopt, we support MPX + * with eager FPU switching at a higher cost. */ static void __init fpu__init_system_ctx_switch(void) { @@ -295,19 +330,11 @@ static void __init fpu__init_system_ctx_switch(void) WARN_ON_FPU(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; - /* Auto enable eagerfpu for xsaveopt */ if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) eagerfpu = ENABLE; - if (xfeatures_mask & XFEATURE_MASK_EAGER) { - if (eagerfpu == DISABLE) { - pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", - xfeatures_mask & XFEATURE_MASK_EAGER); - xfeatures_mask &= ~XFEATURE_MASK_EAGER; - } else { - eagerfpu = ENABLE; - } - } + if (xfeatures_mask & XFEATURE_MASK_EAGER) + eagerfpu = ENABLE; if (eagerfpu == ENABLE) setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); @@ -316,11 +343,48 @@ static void __init fpu__init_system_ctx_switch(void) } /* + * We parse fpu parameters early because fpu__init_system() is executed + * before parse_early_param(). + */ +static void __init fpu__init_parse_early_param(void) +{ + /* + * No need to check "eagerfpu=auto" again, since it is the + * initial default. + */ + if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { + eagerfpu = DISABLE; + fpu__clear_eager_fpu_features(); + } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { + eagerfpu = ENABLE; + } + + if (cmdline_find_option_bool(boot_command_line, "no387")) + setup_clear_cpu_cap(X86_FEATURE_FPU); + + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) { + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); + setup_clear_cpu_cap(X86_FEATURE_XMM); + } + + if (cmdline_find_option_bool(boot_command_line, "noxsave")) + fpu__xstate_clear_all_cpu_caps(); + + if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + + if (cmdline_find_option_bool(boot_command_line, "noxsaves")) + setup_clear_cpu_cap(X86_FEATURE_XSAVES); +} + +/* * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: */ void __init fpu__init_system(struct cpuinfo_x86 *c) { + fpu__init_parse_early_param(); fpu__init_system_early_generic(c); /* @@ -344,62 +408,3 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_ctx_switch(); } - -/* - * Boot parameter to turn off FPU support and fall back to math-emu: - */ -static int __init no_387(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FPU); - return 1; -} -__setup("no387", no_387); - -/* - * Disable all xstate CPU features: - */ -static int __init x86_noxsave_setup(char *s) -{ - if (strlen(s)) - return 0; - - fpu__xstate_clear_all_cpu_caps(); - - return 1; -} -__setup("noxsave", x86_noxsave_setup); - -/* - * Disable the XSAVEOPT instruction specifically: - */ -static int __init x86_noxsaveopt_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - - return 1; -} -__setup("noxsaveopt", x86_noxsaveopt_setup); - -/* - * Disable the XSAVES instruction: - */ -static int __init x86_noxsaves_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - - return 1; -} -__setup("noxsaves", x86_noxsaves_setup); - -/* - * Disable FX save/restore and SSE support: - */ -static int __init x86_nofxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); - setup_clear_cpu_cap(X86_FEATURE_XMM); - - return 1; -} -__setup("nofxsr", x86_nofxsr_setup); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 40f100285984..d425cda5ae6d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -52,6 +52,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512CD); setup_clear_cpu_cap(X86_FEATURE_MPX); + setup_clear_cpu_cap(X86_FEATURE_XGETBV1); } /* @@ -632,8 +633,7 @@ void __init fpu__init_system_xstate(void) BUG(); } - /* Support only the state known to the OS: */ - xfeatures_mask = xfeatures_mask & XCNTXT_MASK; + xfeatures_mask &= fpu__get_supported_xfeatures_mask(); /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c index d1d35ccffed3..92fc1a51f994 100644 --- a/arch/x86/kernel/livepatch.c +++ b/arch/x86/kernel/livepatch.c @@ -20,8 +20,6 @@ #include <linux/module.h> #include <linux/uaccess.h> -#include <asm/cacheflush.h> -#include <asm/page_types.h> #include <asm/elf.h> #include <asm/livepatch.h> @@ -38,11 +36,10 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, unsigned long loc, unsigned long value) { - int ret, numpages, size = 4; - bool readonly; + size_t size = 4; unsigned long val; - unsigned long core = (unsigned long)mod->module_core; - unsigned long core_size = mod->core_size; + unsigned long core = (unsigned long)mod->core_layout.base; + unsigned long core_size = mod->core_layout.size; switch (type) { case R_X86_64_NONE: @@ -69,23 +66,5 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, /* loc does not point to any symbol inside the module */ return -EINVAL; - readonly = false; - -#ifdef CONFIG_DEBUG_SET_MODULE_RONX - if (loc < core + mod->core_ro_size) - readonly = true; -#endif - - /* determine if the relocation spans a page boundary */ - numpages = ((loc & PAGE_MASK) == ((loc + size) & PAGE_MASK)) ? 1 : 2; - - if (readonly) - set_memory_rw(loc & PAGE_MASK, numpages); - - ret = probe_kernel_write((void *)loc, &val, size); - - if (readonly) - set_memory_ro(loc & PAGE_MASK, numpages); - - return ret; + return probe_kernel_write((void *)loc, &val, size); } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d64889aa2d46..ab0adc0fa5db 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), }, }, + { /* Handle problems with rebooting on the iMac10,1. */ + .callback = set_pci_reboot, + .ident = "Apple iMac10,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"), + }, + }, /* ASRock */ { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 4cf401f581e7..07efb35ee4bc 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -48,31 +48,31 @@ verify_cpu: pushfl popl %eax cmpl %eax,%ebx - jz verify_cpu_no_longmode # cpu has no cpuid + jz .Lverify_cpu_no_longmode # cpu has no cpuid #endif movl $0x0,%eax # See if cpuid 1 is implemented cpuid cmpl $0x1,%eax - jb verify_cpu_no_longmode # no cpuid 1 + jb .Lverify_cpu_no_longmode # no cpuid 1 xor %di,%di cmpl $0x68747541,%ebx # AuthenticAMD - jnz verify_cpu_noamd + jnz .Lverify_cpu_noamd cmpl $0x69746e65,%edx - jnz verify_cpu_noamd + jnz .Lverify_cpu_noamd cmpl $0x444d4163,%ecx - jnz verify_cpu_noamd + jnz .Lverify_cpu_noamd mov $1,%di # cpu is from AMD - jmp verify_cpu_check + jmp .Lverify_cpu_check -verify_cpu_noamd: +.Lverify_cpu_noamd: cmpl $0x756e6547,%ebx # GenuineIntel? - jnz verify_cpu_check + jnz .Lverify_cpu_check cmpl $0x49656e69,%edx - jnz verify_cpu_check + jnz .Lverify_cpu_check cmpl $0x6c65746e,%ecx - jnz verify_cpu_check + jnz .Lverify_cpu_check # only call IA32_MISC_ENABLE when: # family > 6 || (family == 6 && model >= 0xd) @@ -83,59 +83,59 @@ verify_cpu_noamd: andl $0x0ff00f00, %eax # mask family and extended family shrl $8, %eax cmpl $6, %eax - ja verify_cpu_clear_xd # family > 6, ok - jb verify_cpu_check # family < 6, skip + ja .Lverify_cpu_clear_xd # family > 6, ok + jb .Lverify_cpu_check # family < 6, skip andl $0x000f00f0, %ecx # mask model and extended model shrl $4, %ecx cmpl $0xd, %ecx - jb verify_cpu_check # family == 6, model < 0xd, skip + jb .Lverify_cpu_check # family == 6, model < 0xd, skip -verify_cpu_clear_xd: +.Lverify_cpu_clear_xd: movl $MSR_IA32_MISC_ENABLE, %ecx rdmsr btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE - jnc verify_cpu_check # only write MSR if bit was changed + jnc .Lverify_cpu_check # only write MSR if bit was changed wrmsr -verify_cpu_check: +.Lverify_cpu_check: movl $0x1,%eax # Does the cpu have what it takes cpuid andl $REQUIRED_MASK0,%edx xorl $REQUIRED_MASK0,%edx - jnz verify_cpu_no_longmode + jnz .Lverify_cpu_no_longmode movl $0x80000000,%eax # See if extended cpuid is implemented cpuid cmpl $0x80000001,%eax - jb verify_cpu_no_longmode # no extended cpuid + jb .Lverify_cpu_no_longmode # no extended cpuid movl $0x80000001,%eax # Does the cpu have what it takes cpuid andl $REQUIRED_MASK1,%edx xorl $REQUIRED_MASK1,%edx - jnz verify_cpu_no_longmode + jnz .Lverify_cpu_no_longmode -verify_cpu_sse_test: +.Lverify_cpu_sse_test: movl $1,%eax cpuid andl $SSE_MASK,%edx cmpl $SSE_MASK,%edx - je verify_cpu_sse_ok + je .Lverify_cpu_sse_ok test %di,%di - jz verify_cpu_no_longmode # only try to force SSE on AMD + jz .Lverify_cpu_no_longmode # only try to force SSE on AMD movl $MSR_K7_HWCR,%ecx rdmsr btr $15,%eax # enable SSE wrmsr xor %di,%di # don't loop - jmp verify_cpu_sse_test # try again + jmp .Lverify_cpu_sse_test # try again -verify_cpu_no_longmode: +.Lverify_cpu_no_longmode: popf # Restore caller passed flags movl $1,%eax ret -verify_cpu_sse_ok: +.Lverify_cpu_sse_ok: popf # Restore caller passed flags xorl %eax, %eax ret diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 483231ebbb0b..e574b8546518 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -175,7 +175,11 @@ static void mark_screen_rdonly(struct mm_struct *mm) if (pud_none_or_clear_bad(pud)) goto out; pmd = pmd_offset(pud, 0xA0000); - split_huge_page_pmd_mm(mm, 0xA0000, pmd); + + if (pmd_trans_huge(*pmd)) { + struct vm_area_struct *vma = find_vma(mm, 0xA0000); + split_huge_pmd(vma, pmd, 0xA0000); + } if (pmd_none_or_clear_bad(pmd)) goto out; pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 5c520ebf6343..a22a488b4622 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -43,11 +43,11 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, +static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, unsigned long npages) { gfn_t end_gfn; - pfn_t pfn; + kvm_pfn_t pfn; pfn = gfn_to_pfn_memslot(slot, gfn); end_gfn = gfn + npages; @@ -62,7 +62,8 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, return pfn; } -static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long npages) { unsigned long i; @@ -73,7 +74,7 @@ static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { gfn_t gfn, end_gfn; - pfn_t pfn; + kvm_pfn_t pfn; int r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; int flags; @@ -275,7 +276,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm, { struct iommu_domain *domain; gfn_t end_gfn, gfn; - pfn_t pfn; + kvm_pfn_t pfn; u64 phys; domain = kvm->arch.iommu_domain; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 420a5ca3c0ee..95a955de5964 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -259,7 +259,7 @@ static unsigned get_mmio_spte_access(u64 spte) } static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, - pfn_t pfn, unsigned access) + kvm_pfn_t pfn, unsigned access) { if (unlikely(is_noslot_pfn(pfn))) { mark_mmio_spte(vcpu, sptep, gfn, access); @@ -320,7 +320,7 @@ static int is_last_spte(u64 pte, int level) return 0; } -static pfn_t spte_to_pfn(u64 pte) +static kvm_pfn_t spte_to_pfn(u64 pte) { return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; } @@ -582,7 +582,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) */ static int mmu_spte_clear_track_bits(u64 *sptep) { - pfn_t pfn; + kvm_pfn_t pfn; u64 old_spte = *sptep; if (!spte_has_volatile_bits(old_spte)) @@ -1372,7 +1372,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, int need_flush = 0; u64 new_spte; pte_t *ptep = (pte_t *)data; - pfn_t new_pfn; + kvm_pfn_t new_pfn; WARN_ON(pte_huge(*ptep)); new_pfn = pte_pfn(*ptep); @@ -2450,7 +2450,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, return 0; } -static bool kvm_is_mmio_pfn(pfn_t pfn) +static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); @@ -2460,7 +2460,7 @@ static bool kvm_is_mmio_pfn(pfn_t pfn) static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int level, - gfn_t gfn, pfn_t pfn, bool speculative, + gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { u64 spte; @@ -2539,7 +2539,7 @@ done: } static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, - int write_fault, int level, gfn_t gfn, pfn_t pfn, + int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool host_writable) { int was_rmapped = 0; @@ -2602,7 +2602,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, return emulate; } -static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, +static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) { struct kvm_memory_slot *slot; @@ -2684,7 +2684,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) } static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, - int level, gfn_t gfn, pfn_t pfn, bool prefault) + int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; @@ -2732,7 +2732,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * send_sig_info(SIGBUS, &info, tsk); } -static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) +static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) { /* * Do not cache the mmio info caused by writing the readonly gfn @@ -2752,9 +2752,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, - gfn_t *gfnp, pfn_t *pfnp, int *levelp) + gfn_t *gfnp, kvm_pfn_t *pfnp, + int *levelp) { - pfn_t pfn = *pfnp; + kvm_pfn_t pfn = *pfnp; gfn_t gfn = *gfnp; int level = *levelp; @@ -2793,7 +2794,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, } static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, - pfn_t pfn, unsigned access, int *ret_val) + kvm_pfn_t pfn, unsigned access, int *ret_val) { bool ret = true; @@ -2947,7 +2948,7 @@ exit: } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, pfn_t *pfn, bool write, bool *writable); + gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); static void make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, @@ -2956,7 +2957,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, int r; int level; bool force_pt_level = false; - pfn_t pfn; + kvm_pfn_t pfn; unsigned long mmu_seq; bool map_writable, write = error_code & PFERR_WRITE_MASK; @@ -3410,7 +3411,7 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu) } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, pfn_t *pfn, bool write, bool *writable) + gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -3448,7 +3449,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, bool prefault) { - pfn_t pfn; + kvm_pfn_t pfn; int r; int level; bool force_pt_level; @@ -4601,7 +4602,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, u64 *sptep; struct rmap_iterator iter; int need_tlb_flush = 0; - pfn_t pfn; + kvm_pfn_t pfn; struct kvm_mmu_page *sp; restart: diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 1cee3ec20dd2..dcce533d420c 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -97,7 +97,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) { struct kvm_mmu_page *sp; gfn_t gfn; - pfn_t pfn; + kvm_pfn_t pfn; hpa_t hpa; sp = page_header(__pa(sptep)); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 91e939b486d1..6c9fed957cce 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -456,7 +456,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, { unsigned pte_access; gfn_t gfn; - pfn_t pfn; + kvm_pfn_t pfn; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) return false; @@ -551,7 +551,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, int write_fault, int hlevel, - pfn_t pfn, bool map_writable, bool prefault) + kvm_pfn_t pfn, bool map_writable, bool prefault) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; @@ -694,7 +694,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int user_fault = error_code & PFERR_USER_MASK; struct guest_walker walker; int r; - pfn_t pfn; + kvm_pfn_t pfn; int level = PT_PAGE_TABLE_LEVEL; bool force_pt_level = false; unsigned long mmu_seq; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 04d61d496b14..e2951b6edbbc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4251,7 +4251,7 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { int i, idx, r = 0; - pfn_t identity_map_pfn; + kvm_pfn_t identity_map_pfn; u32 tmp; if (!enable_ept) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f53f5b13c677..4244c2baf57d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5148,7 +5148,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, int emulation_type) { gpa_t gpa = cr2; - pfn_t pfn; + kvm_pfn_t pfn; if (emulation_type & EMULTYPE_NO_REEXECUTE) return false; diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index ae9a37bf1371..6d5eb5900372 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -9,6 +9,7 @@ #include <linux/vmstat.h> #include <linux/highmem.h> #include <linux/swap.h> +#include <linux/memremap.h> #include <asm/pgtable.h> @@ -63,6 +64,16 @@ retry: #endif } +static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) +{ + while ((*nr) - nr_start) { + struct page *page = pages[--(*nr)]; + + ClearPageReferenced(page); + put_page(page); + } +} + /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -71,7 +82,9 @@ retry: static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { + struct dev_pagemap *pgmap = NULL; unsigned long mask; + int nr_start = *nr; pte_t *ptep; mask = _PAGE_PRESENT|_PAGE_USER; @@ -89,13 +102,21 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } - if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { + page = pte_page(pte); + if (pte_devmap(pte)) { + pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + pte_unmap(ptep); + return 0; + } + } else if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { pte_unmap(ptep); return 0; } VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); get_page(page); + put_dev_pagemap(pgmap); SetPageReferenced(page); pages[*nr] = page; (*nr)++; @@ -114,6 +135,32 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + int nr_start = *nr; + unsigned long pfn = pmd_pfn(pmd); + struct dev_pagemap *pgmap = NULL; + + pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; + do { + struct page *page = pfn_to_page(pfn); + + pgmap = get_dev_pagemap(pfn, pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + SetPageReferenced(page); + pages[*nr] = page; + get_page(page); + put_dev_pagemap(pgmap); + (*nr)++; + pfn++; + } while (addr += PAGE_SIZE, addr != end); + return 1; +} + static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -126,9 +173,13 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, mask |= _PAGE_RW; if ((pmd_flags(pmd) & mask) != mask) return 0; + + VM_BUG_ON(!pfn_valid(pmd_pfn(pmd))); + if (pmd_devmap(pmd)) + return __gup_device_huge_pmd(pmd, addr, end, pages, nr); + /* hugepages are never "special" */ VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pmd_pfn(pmd))); refs = 0; head = pmd_page(pmd); @@ -136,8 +187,6 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, do { VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); (*nr)++; page++; refs++; @@ -158,18 +207,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; next = pmd_addr_end(addr, end); - /* - * The pmd_trans_splitting() check below explains why - * pmdp_splitting_flush has to flush the tlb, to stop - * this gup-fast code from running while we set the - * splitting bit in the pmd. Returning zero will take - * the slow path that will call wait_split_huge_page() - * if the pmd is still in splitting state. gup-fast - * can't because it has irq disabled and - * wait_split_huge_page() would never return as the - * tlb flush IPI wouldn't run. - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return 0; if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { /* @@ -212,8 +250,6 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, do { VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); (*nr)++; page++; refs++; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec081fe0ce2c..5488d21123bd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> +#include <linux/memremap.h> #include <linux/nmi.h> #include <linux/gfp.h> #include <linux/kcore.h> @@ -714,6 +715,12 @@ static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; + struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); + + if (altmap) { + vmem_altmap_free(altmap, nr_pages); + return; + } /* bootmem page has reserved flag */ if (PageReserved(page)) { @@ -814,8 +821,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, if (phys_addr < (phys_addr_t)0x40000000) return; - if (IS_ALIGNED(addr, PAGE_SIZE) && - IS_ALIGNED(next, PAGE_SIZE)) { + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { /* * Do not free direct mapping pages since they were * freed when offlining, or simplely not in use. @@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + struct vmem_altmap *altmap; struct zone *zone; int ret; - zone = page_zone(pfn_to_page(start_pfn)); - kernel_physical_mapping_remove(start, start + size); + /* With altmap the first mapped page is offset from @start */ + altmap = to_vmem_altmap((unsigned long) page); + if (altmap) + page += vmem_altmap_offset(altmap); + zone = page_zone(page); ret = __remove_pages(zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + kernel_physical_mapping_remove(start, start + size); return ret; } @@ -1236,7 +1248,7 @@ static void __meminitdata *p_start, *p_end; static int __meminitdata node_start; static int __meminit vmemmap_populate_hugepages(unsigned long start, - unsigned long end, int node) + unsigned long end, int node, struct vmem_altmap *altmap) { unsigned long addr; unsigned long next; @@ -1259,7 +1271,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = vmemmap_alloc_block_buf(PMD_SIZE, node); + p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (p) { pte_t entry; @@ -1280,7 +1292,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, addr_end = addr + PMD_SIZE; p_end = p + PMD_SIZE; continue; - } + } else if (altmap) + return -ENOMEM; /* no fallback */ } else if (pmd_large(*pmd)) { vmemmap_verify((pte_t *)pmd, node, addr, next); continue; @@ -1294,11 +1307,16 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { + struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (cpu_has_pse) - err = vmemmap_populate_hugepages(start, end, node); - else + err = vmemmap_populate_hugepages(start, end, node, altmap); + else if (altmap) { + pr_err_once("%s: no cpu support for altmap allocations\n", + __func__); + err = -ENOMEM; + } else err = vmemmap_populate_basepages(start, end, node); if (!err) sync_global_pgds(start, end - 1, 0); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 844b06d67df4..96bd1e2bffaf 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -69,14 +69,14 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ if (mmap_is_ia32()) - rnd = (unsigned long)get_random_int() % (1<<8); +#ifdef CONFIG_COMPAT + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); +#else + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); +#endif else - rnd = (unsigned long)get_random_int() % (1<<28); + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6000ad7f560c..fc6a4c8f6e2a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -66,6 +66,9 @@ void update_page_count(int level, unsigned long pages) static void split_page_count(int level) { + if (direct_pages_count[level] == 0) + return; + direct_pages_count[level]--; direct_pages_count[level - 1] += PTRS_PER_PTE; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 031782e74231..f4ae536b0914 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -12,6 +12,7 @@ #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/pfn_t.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/fs.h> @@ -949,7 +950,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, } int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn) + pfn_t pfn) { enum page_cache_mode pcm; @@ -957,7 +958,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, return 0; /* Set prot based on lookup */ - pcm = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT); + pcm = lookup_memtype(pfn_t_to_phys(pfn)); *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | cachemode2protval(pcm)); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ee9c2e3a7199..4eb287e25043 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -505,19 +505,6 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, return young; } - -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - int set; - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - set = !test_and_set_bit(_PAGE_BIT_SPLITTING, - (unsigned long *)pmdp); - if (set) { - /* need tlb flush only to serialize against gup-fast */ - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - } -} #endif /** diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8ddb5d0d66fb..8f4cc3dfac32 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -161,7 +161,10 @@ void flush_tlb_current_task(void) preempt_disable(); count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + + /* This is an implicit full barrier that synchronizes with switch_mm. */ local_flush_tlb(); + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); @@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); - if (current->active_mm != mm) + if (current->active_mm != mm) { + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; + } if (!current->mm) { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; } if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) base_pages_to_flush = (end - start) >> PAGE_SHIFT; + /* + * Both branches below are implicit full barriers (MOV to CR or + * INVLPG) that synchronize with switch_mm. + */ if (base_pages_to_flush > tlb_single_page_flush_ceiling) { base_pages_to_flush = TLB_FLUSH_ALL; count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); @@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) preempt_disable(); if (current->active_mm == mm) { - if (current->mm) + if (current->mm) { + /* + * Implicit full barrier (INVLPG) that synchronizes + * with switch_mm. + */ __flush_tlb_one(start); - else + } else { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + } } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 755481f14d90..174781a404ff 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -36,13 +36,6 @@ #endif /* CONFIG_X86_PPRO_FENCE */ #define dma_wmb() barrier() -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() - -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) - -#define read_barrier_depends() do { } while (0) -#define smp_read_barrier_depends() do { } while (0) +#include <asm-generic/barrier.h> #endif |