summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/Makefile3
-rw-r--r--arch/x86/boot/compressed/Makefile6
-rw-r--r--arch/x86/boot/compressed/eboot.c889
-rw-r--r--arch/x86/boot/compressed/eboot.h31
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S29
-rw-r--r--arch/x86/boot/compressed/head_32.S92
-rw-r--r--arch/x86/boot/compressed/head_64.S209
-rw-r--r--arch/x86/boot/compressed/kaslr_64.c3
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/boot/header.S93
-rw-r--r--arch/x86/boot/setup.ld1
-rw-r--r--arch/x86/boot/tools/build.c106
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/crypto/Makefile7
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c2
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c2
-rw-r--r--arch/x86/entry/Makefile1
-rw-r--r--arch/x86/entry/common.c19
-rw-r--r--arch/x86/entry/entry_32.S23
-rw-r--r--arch/x86/entry/entry_64.S4
-rw-r--r--arch/x86/entry/syscall_32.c19
-rw-r--r--arch/x86/entry/syscall_64.c39
-rw-r--r--arch/x86/entry/syscall_x32.c29
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl818
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl740
-rw-r--r--arch/x86/entry/syscalls/syscallhdr.sh7
-rw-r--r--arch/x86/entry/syscalls/syscalltbl.sh44
-rw-r--r--arch/x86/entry/thunk_32.S8
-rw-r--r--arch/x86/entry/thunk_64.S8
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S7
-rw-r--r--arch/x86/entry/vdso/vdso32/vclock_gettime.c1
-rw-r--r--arch/x86/entry/vdso/vma.c8
-rw-r--r--arch/x86/events/amd/core.c1
-rw-r--r--arch/x86/events/amd/power.c2
-rw-r--r--arch/x86/events/amd/uncore.c61
-rw-r--r--arch/x86/events/core.c27
-rw-r--r--arch/x86/events/intel/core.c26
-rw-r--r--arch/x86/events/intel/cstate.c101
-rw-r--r--arch/x86/events/intel/ds.c2
-rw-r--r--arch/x86/events/intel/lbr.c9
-rw-r--r--arch/x86/events/intel/rapl.c58
-rw-r--r--arch/x86/events/intel/uncore.c75
-rw-r--r--arch/x86/events/intel/uncore.h7
-rw-r--r--arch/x86/events/intel/uncore_snb.c159
-rw-r--r--arch/x86/events/intel/uncore_snbep.c12
-rw-r--r--arch/x86/events/msr.c3
-rw-r--r--arch/x86/ia32/Makefile2
-rw-r--r--arch/x86/ia32/ia32_signal.c304
-rw-r--r--arch/x86/include/asm/amd_nb.h1
-rw-r--r--arch/x86/include/asm/asm.h6
-rw-r--r--arch/x86/include/asm/clocksource.h20
-rw-r--r--arch/x86/include/asm/cpu.h12
-rw-r--r--arch/x86/include/asm/cpu_device_id.h132
-rw-r--r--arch/x86/include/asm/cpufeatures.h5
-rw-r--r--arch/x86/include/asm/dwarf2.h4
-rw-r--r--arch/x86/include/asm/efi.h23
-rw-r--r--arch/x86/include/asm/futex.h99
-rw-r--r--arch/x86/include/asm/intel-family.h17
-rw-r--r--arch/x86/include/asm/io_bitmap.h9
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/kprobes.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h14
-rw-r--r--arch/x86/include/asm/kvm_host.h19
-rw-r--r--arch/x86/include/asm/mce.h9
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/paravirt.h7
-rw-r--r--arch/x86/include/asm/paravirt_types.h4
-rw-r--r--arch/x86/include/asm/perf_event.h15
-rw-r--r--arch/x86/include/asm/pgtable.h13
-rw-r--r--arch/x86/include/asm/pgtable_types.h2
-rw-r--r--arch/x86/include/asm/pkeys.h5
-rw-r--r--arch/x86/include/asm/preempt.h8
-rw-r--r--arch/x86/include/asm/processor.h13
-rw-r--r--arch/x86/include/asm/sections.h20
-rw-r--r--arch/x86/include/asm/set_memory.h2
-rw-r--r--arch/x86/include/asm/sigframe.h6
-rw-r--r--arch/x86/include/asm/sighandling.h7
-rw-r--r--arch/x86/include/asm/syscall.h16
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h287
-rw-r--r--arch/x86/include/asm/syscalls.h34
-rw-r--r--arch/x86/include/asm/thread_info.h14
-rw-r--r--arch/x86/include/asm/topology.h25
-rw-r--r--arch/x86/include/asm/traps.h17
-rw-r--r--arch/x86/include/asm/uaccess.h233
-rw-r--r--arch/x86/include/asm/uaccess_32.h27
-rw-r--r--arch/x86/include/asm/uaccess_64.h108
-rw-r--r--arch/x86/include/asm/unistd.h7
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/vdso/clocksource.h10
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h6
-rw-r--r--arch/x86/include/asm/vdso/processor.h23
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h15
-rw-r--r--arch/x86/include/asm/vgtod.h12
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/include/asm/vmxfeatures.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/amd_nb.c4
-rw-r--r--arch/x86/kernel/apic/apic.c32
-rw-r--r--arch/x86/kernel/apic/vector.c20
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c10
-rw-r--r--arch/x86/kernel/asm-offsets_64.c36
-rw-r--r--arch/x86/kernel/cpu/amd.c47
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/feat_ctl.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c183
-rw-r--r--arch/x86/kernel/cpu/match.c13
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c50
-rw-r--r--arch/x86/kernel/cpu/mce/core.c16
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c47
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c26
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h5
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c9
-rw-r--r--arch/x86/kernel/cpu/umwait.c1
-rw-r--r--arch/x86/kernel/cpu/vmware.c229
-rw-r--r--arch/x86/kernel/fpu/xstate.c75
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/ima_arch.c8
-rw-r--r--arch/x86/kernel/ioport.c1
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irqinit.c18
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c5
-rw-r--r--arch/x86/kernel/kprobes/opt.c25
-rw-r--r--arch/x86/kernel/kvm.c65
-rw-r--r--arch/x86/kernel/kvmclock.c9
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/nmi.c4
-rw-r--r--arch/x86/kernel/paravirt.c5
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/process_32.c1
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/pvclock.c2
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S12
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/signal.c403
-rw-r--r--arch/x86/kernel/smpboot.c294
-rw-r--r--arch/x86/kernel/stacktrace.c6
-rw-r--r--arch/x86/kernel/sys_ia32.c (renamed from arch/x86/ia32/sys_ia32.c)143
-rw-r--r--arch/x86/kernel/sys_x86_64.c1
-rw-r--r--arch/x86/kernel/time.c27
-rw-r--r--arch/x86/kernel/topology.c22
-rw-r--r--arch/x86/kernel/traps.c65
-rw-r--r--arch/x86/kernel/tsc.c34
-rw-r--r--arch/x86/kernel/tsc_msr.c142
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/vm86_32.c115
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86/kvm/Kconfig13
-rw-r--r--arch/x86/kvm/Makefile1
-rw-r--r--arch/x86/kvm/emulate.c37
-rw-r--r--arch/x86/kvm/ioapic.c7
-rw-r--r--arch/x86/kvm/irq_comm.c2
-rw-r--r--arch/x86/kvm/lapic.c20
-rw-r--r--arch/x86/kvm/mmu.h13
-rw-r--r--arch/x86/kvm/mmu/mmu.c11
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/mmutrace.h2
-rw-r--r--arch/x86/kvm/svm.c103
-rw-r--r--arch/x86/kvm/trace.h4
-rw-r--r--arch/x86/kvm/vmx/capabilities.h1
-rw-r--r--arch/x86/kvm/vmx/nested.c127
-rw-r--r--arch/x86/kvm/vmx/nested.h10
-rw-r--r--arch/x86/kvm/vmx/vmx.c174
-rw-r--r--arch/x86/kvm/vmx/vmx.h5
-rw-r--r--arch/x86/kvm/x86.c86
-rw-r--r--arch/x86/lib/x86-opcode-map.txt17
-rw-r--r--arch/x86/mm/dump_pagetables.c7
-rw-r--r--arch/x86/mm/extable.c12
-rw-r--r--arch/x86/mm/fault.c26
-rw-r--r--arch/x86/mm/init_32.c38
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/mm/ioremap.c21
-rw-r--r--arch/x86/mm/kmmio.c10
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/numa_emulation.c2
-rw-r--r--arch/x86/mm/pat/set_memory.c3
-rw-r--r--arch/x86/mm/pti.c8
-rw-r--r--arch/x86/net/bpf_jit_comp32.c10
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c13
-rw-r--r--arch/x86/platform/efi/efi.c288
-rw-r--r--arch/x86/platform/efi/efi_32.c13
-rw-r--r--arch/x86/platform/efi/efi_64.c165
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S21
-rw-r--r--arch/x86/platform/efi/quirks.c13
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c5
-rw-r--r--arch/x86/platform/intel-quark/imr.c2
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c2
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c13
-rw-r--r--arch/x86/power/cpu.c16
-rw-r--r--arch/x86/realmode/rm/Makefile1
-rw-r--r--arch/x86/realmode/rm/realmode.lds.S1
-rw-r--r--arch/x86/um/Kconfig5
-rw-r--r--arch/x86/um/Makefile1
-rw-r--r--arch/x86/um/sys_call_table_32.c6
-rw-r--r--arch/x86/um/sys_call_table_64.c9
-rw-r--r--arch/x86/um/user-offsets.c15
-rw-r--r--arch/x86/xen/enlighten_pv.c32
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/time.c36
214 files changed, 4831 insertions, 4342 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index beea77046f9b..c1e1931f591f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -30,7 +30,6 @@ config X86_64
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select SWIOTLB
- select ARCH_HAS_SYSCALL_WRAPPER
config FORCE_DYNAMIC_FTRACE
def_bool y
@@ -57,7 +56,6 @@ config X86
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_32BIT_OFF_T if X86_32
- select ARCH_CLOCKSOURCE_DATA
select ARCH_CLOCKSOURCE_INIT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_DEBUG_VIRTUAL
@@ -80,6 +78,7 @@ config X86
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
+ select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
@@ -128,6 +127,7 @@ config X86
select GENERIC_GETTIMEOFDAY
select GENERIC_VDSO_TIME_NS
select GUP_GET_PTE_LOW_HIGH if X86_PAE
+ select HARDIRQS_SW_RESEND
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
@@ -240,11 +240,6 @@ config OUTPUT_FORMAT
default "elf32-i386" if X86_32
default "elf64-x86-64" if X86_64
-config ARCH_DEFCONFIG
- string
- default "arch/x86/configs/i386_defconfig" if X86_32
- default "arch/x86/configs/x86_64_defconfig" if X86_64
-
config LOCKDEP_SUPPORT
def_bool y
@@ -1875,7 +1870,6 @@ config X86_SMAP
config X86_UMIP
def_bool y
- depends on CPU_SUP_INTEL || CPU_SUP_AMD
prompt "User Mode Instruction Prevention" if EXPERT
---help---
User Mode Instruction Prevention (UMIP) is a security feature in
@@ -2418,7 +2412,7 @@ config CMDLINE
config CMDLINE_OVERRIDE
bool "Built-in command line overrides boot loader arguments"
- depends on CMDLINE_BOOL
+ depends on CMDLINE_BOOL && CMDLINE != ""
---help---
Set this option to 'Y' to have the kernel ignore the boot loader
command line, and use ONLY the built-in command line.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 94df0868804b..513a55562d75 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -194,9 +194,10 @@ avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
+adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 012b82fc8617..e17be90ab312 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -68,6 +68,7 @@ clean-files += cpustr.h
KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
GCOV_PROFILE := n
UBSAN_SANITIZE := n
@@ -88,7 +89,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 26050ae0b27e..5f7c262bcc99 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -39,6 +39,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
KBUILD_CFLAGS += -Wno-pointer-sign
KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
@@ -87,10 +88,7 @@ endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
-$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
-
-vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o \
- $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
deleted file mode 100644
index 287393d725f0..000000000000
--- a/arch/x86/boot/compressed/eboot.c
+++ /dev/null
@@ -1,889 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-/* -----------------------------------------------------------------------
- *
- * Copyright 2011 Intel Corporation; author Matt Fleming
- *
- * ----------------------------------------------------------------------- */
-
-#pragma GCC visibility push(hidden)
-
-#include <linux/efi.h>
-#include <linux/pci.h>
-
-#include <asm/efi.h>
-#include <asm/e820/types.h>
-#include <asm/setup.h>
-#include <asm/desc.h>
-#include <asm/boot.h>
-
-#include "../string.h"
-#include "eboot.h"
-
-static efi_system_table_t *sys_table;
-extern const bool efi_is64;
-
-__pure efi_system_table_t *efi_system_table(void)
-{
- return sys_table;
-}
-
-__attribute_const__ bool efi_is_64bit(void)
-{
- if (IS_ENABLED(CONFIG_EFI_MIXED))
- return efi_is64;
- return IS_ENABLED(CONFIG_X86_64);
-}
-
-static efi_status_t
-preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
-{
- struct pci_setup_rom *rom = NULL;
- efi_status_t status;
- unsigned long size;
- uint64_t romsize;
- void *romimage;
-
- /*
- * Some firmware images contain EFI function pointers at the place where
- * the romimage and romsize fields are supposed to be. Typically the EFI
- * code is mapped at high addresses, translating to an unrealistically
- * large romsize. The UEFI spec limits the size of option ROMs to 16
- * MiB so we reject any ROMs over 16 MiB in size to catch this.
- */
- romimage = efi_table_attr(pci, romimage);
- romsize = efi_table_attr(pci, romsize);
- if (!romimage || !romsize || romsize > SZ_16M)
- return EFI_INVALID_PARAMETER;
-
- size = romsize + sizeof(*rom);
-
- status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
- (void **)&rom);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to allocate memory for 'rom'\n");
- return status;
- }
-
- memset(rom, 0, sizeof(*rom));
-
- rom->data.type = SETUP_PCI;
- rom->data.len = size - sizeof(struct setup_data);
- rom->data.next = 0;
- rom->pcilen = pci->romsize;
- *__rom = rom;
-
- status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
- PCI_VENDOR_ID, 1, &rom->vendor);
-
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to read rom->vendor\n");
- goto free_struct;
- }
-
- status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
- PCI_DEVICE_ID, 1, &rom->devid);
-
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to read rom->devid\n");
- goto free_struct;
- }
-
- status = efi_call_proto(pci, get_location, &rom->segment, &rom->bus,
- &rom->device, &rom->function);
-
- if (status != EFI_SUCCESS)
- goto free_struct;
-
- memcpy(rom->romdata, romimage, romsize);
- return status;
-
-free_struct:
- efi_bs_call(free_pool, rom);
- return status;
-}
-
-/*
- * There's no way to return an informative status from this function,
- * because any analysis (and printing of error messages) needs to be
- * done directly at the EFI function call-site.
- *
- * For example, EFI_INVALID_PARAMETER could indicate a bug or maybe we
- * just didn't find any PCI devices, but there's no way to tell outside
- * the context of the call.
- */
-static void setup_efi_pci(struct boot_params *params)
-{
- efi_status_t status;
- void **pci_handle = NULL;
- efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
- unsigned long size = 0;
- struct setup_data *data;
- efi_handle_t h;
- int i;
-
- status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
- &pci_proto, NULL, &size, pci_handle);
-
- if (status == EFI_BUFFER_TOO_SMALL) {
- status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
- (void **)&pci_handle);
-
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to allocate memory for 'pci_handle'\n");
- return;
- }
-
- status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
- &pci_proto, NULL, &size, pci_handle);
- }
-
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- for_each_efi_handle(h, pci_handle, size, i) {
- efi_pci_io_protocol_t *pci = NULL;
- struct pci_setup_rom *rom;
-
- status = efi_bs_call(handle_protocol, h, &pci_proto,
- (void **)&pci);
- if (status != EFI_SUCCESS || !pci)
- continue;
-
- status = preserve_pci_rom_image(pci, &rom);
- if (status != EFI_SUCCESS)
- continue;
-
- if (data)
- data->next = (unsigned long)rom;
- else
- params->hdr.setup_data = (unsigned long)rom;
-
- data = (struct setup_data *)rom;
- }
-
-free_handle:
- efi_bs_call(free_pool, pci_handle);
-}
-
-static void retrieve_apple_device_properties(struct boot_params *boot_params)
-{
- efi_guid_t guid = APPLE_PROPERTIES_PROTOCOL_GUID;
- struct setup_data *data, *new;
- efi_status_t status;
- u32 size = 0;
- apple_properties_protocol_t *p;
-
- status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&p);
- if (status != EFI_SUCCESS)
- return;
-
- if (efi_table_attr(p, version) != 0x10000) {
- efi_printk("Unsupported properties proto version\n");
- return;
- }
-
- efi_call_proto(p, get_all, NULL, &size);
- if (!size)
- return;
-
- do {
- status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
- size + sizeof(struct setup_data),
- (void **)&new);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to allocate memory for 'properties'\n");
- return;
- }
-
- status = efi_call_proto(p, get_all, new->data, &size);
-
- if (status == EFI_BUFFER_TOO_SMALL)
- efi_bs_call(free_pool, new);
- } while (status == EFI_BUFFER_TOO_SMALL);
-
- new->type = SETUP_APPLE_PROPERTIES;
- new->len = size;
- new->next = 0;
-
- data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
- if (!data) {
- boot_params->hdr.setup_data = (unsigned long)new;
- } else {
- while (data->next)
- data = (struct setup_data *)(unsigned long)data->next;
- data->next = (unsigned long)new;
- }
-}
-
-static const efi_char16_t apple[] = L"Apple";
-
-static void setup_quirks(struct boot_params *boot_params)
-{
- efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
- efi_table_attr(efi_system_table(), fw_vendor);
-
- if (!memcmp(fw_vendor, apple, sizeof(apple))) {
- if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
- retrieve_apple_device_properties(boot_params);
- }
-}
-
-/*
- * See if we have Universal Graphics Adapter (UGA) protocol
- */
-static efi_status_t
-setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
-{
- efi_status_t status;
- u32 width, height;
- void **uga_handle = NULL;
- efi_uga_draw_protocol_t *uga = NULL, *first_uga;
- efi_handle_t handle;
- int i;
-
- status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
- (void **)&uga_handle);
- if (status != EFI_SUCCESS)
- return status;
-
- status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
- uga_proto, NULL, &size, uga_handle);
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- height = 0;
- width = 0;
-
- first_uga = NULL;
- for_each_efi_handle(handle, uga_handle, size, i) {
- efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u32 w, h, depth, refresh;
- void *pciio;
-
- status = efi_bs_call(handle_protocol, handle, uga_proto,
- (void **)&uga);
- if (status != EFI_SUCCESS)
- continue;
-
- pciio = NULL;
- efi_bs_call(handle_protocol, handle, &pciio_proto, &pciio);
-
- status = efi_call_proto(uga, get_mode, &w, &h, &depth, &refresh);
- if (status == EFI_SUCCESS && (!first_uga || pciio)) {
- width = w;
- height = h;
-
- /*
- * Once we've found a UGA supporting PCIIO,
- * don't bother looking any further.
- */
- if (pciio)
- break;
-
- first_uga = uga;
- }
- }
-
- if (!width && !height)
- goto free_handle;
-
- /* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
- si->lfb_depth = 32;
- si->lfb_width = width;
- si->lfb_height = height;
-
- si->red_size = 8;
- si->red_pos = 16;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 0;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
-
-free_handle:
- efi_bs_call(free_pool, uga_handle);
-
- return status;
-}
-
-void setup_graphics(struct boot_params *boot_params)
-{
- efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
- struct screen_info *si;
- efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
- efi_status_t status;
- unsigned long size;
- void **gop_handle = NULL;
- void **uga_handle = NULL;
-
- si = &boot_params->screen_info;
- memset(si, 0, sizeof(*si));
-
- size = 0;
- status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
- &graphics_proto, NULL, &size, gop_handle);
- if (status == EFI_BUFFER_TOO_SMALL)
- status = efi_setup_gop(si, &graphics_proto, size);
-
- if (status != EFI_SUCCESS) {
- size = 0;
- status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
- &uga_proto, NULL, &size, uga_handle);
- if (status == EFI_BUFFER_TOO_SMALL)
- setup_uga(si, &uga_proto, size);
- }
-}
-
-void startup_32(struct boot_params *boot_params);
-
-void __noreturn efi_stub_entry(efi_handle_t handle,
- efi_system_table_t *sys_table_arg,
- struct boot_params *boot_params);
-
-/*
- * Because the x86 boot code expects to be passed a boot_params we
- * need to create one ourselves (usually the bootloader would create
- * one for us).
- */
-efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
- efi_system_table_t *sys_table_arg)
-{
- struct boot_params *boot_params;
- struct apm_bios_info *bi;
- struct setup_header *hdr;
- efi_loaded_image_t *image;
- efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
- int options_size = 0;
- efi_status_t status;
- char *cmdline_ptr;
- unsigned long ramdisk_addr;
- unsigned long ramdisk_size;
-
- sys_table = sys_table_arg;
-
- /* Check if we were booted by the EFI firmware */
- if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- return EFI_INVALID_PARAMETER;
-
- status = efi_bs_call(handle_protocol, handle, &proto, (void *)&image);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
- return status;
- }
-
- status = efi_low_alloc(0x4000, 1, (unsigned long *)&boot_params);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to allocate lowmem for boot params\n");
- return status;
- }
-
- memset(boot_params, 0x0, 0x4000);
-
- hdr = &boot_params->hdr;
- bi = &boot_params->apm_bios_info;
-
- /* Copy the second sector to boot_params */
- memcpy(&hdr->jump, image->image_base + 512, 512);
-
- /*
- * Fill out some of the header fields ourselves because the
- * EFI firmware loader doesn't load the first sector.
- */
- hdr->root_flags = 1;
- hdr->vid_mode = 0xffff;
- hdr->boot_flag = 0xAA55;
-
- hdr->type_of_loader = 0x21;
-
- /* Convert unicode cmdline to ascii */
- cmdline_ptr = efi_convert_cmdline(image, &options_size);
- if (!cmdline_ptr)
- goto fail;
-
- hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
- /* Fill in upper bits of command line address, NOP on 32 bit */
- boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
-
- hdr->ramdisk_image = 0;
- hdr->ramdisk_size = 0;
-
- /* Clear APM BIOS info */
- memset(bi, 0, sizeof(*bi));
-
- status = efi_parse_options(cmdline_ptr);
- if (status != EFI_SUCCESS)
- goto fail2;
-
- status = handle_cmdline_files(image,
- (char *)(unsigned long)hdr->cmd_line_ptr,
- "initrd=", hdr->initrd_addr_max,
- &ramdisk_addr, &ramdisk_size);
-
- if (status != EFI_SUCCESS &&
- hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) {
- efi_printk("Trying to load files to higher address\n");
- status = handle_cmdline_files(image,
- (char *)(unsigned long)hdr->cmd_line_ptr,
- "initrd=", -1UL,
- &ramdisk_addr, &ramdisk_size);
- }
-
- if (status != EFI_SUCCESS)
- goto fail2;
- hdr->ramdisk_image = ramdisk_addr & 0xffffffff;
- hdr->ramdisk_size = ramdisk_size & 0xffffffff;
- boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32;
- boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
-
- hdr->code32_start = (u32)(unsigned long)startup_32;
-
- efi_stub_entry(handle, sys_table, boot_params);
- /* not reached */
-
-fail2:
- efi_free(options_size, hdr->cmd_line_ptr);
-fail:
- efi_free(0x4000, (unsigned long)boot_params);
-
- return status;
-}
-
-static void add_e820ext(struct boot_params *params,
- struct setup_data *e820ext, u32 nr_entries)
-{
- struct setup_data *data;
-
- e820ext->type = SETUP_E820_EXT;
- e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
- e820ext->next = 0;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- if (data)
- data->next = (unsigned long)e820ext;
- else
- params->hdr.setup_data = (unsigned long)e820ext;
-}
-
-static efi_status_t
-setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_size)
-{
- struct boot_e820_entry *entry = params->e820_table;
- struct efi_info *efi = &params->efi_info;
- struct boot_e820_entry *prev = NULL;
- u32 nr_entries;
- u32 nr_desc;
- int i;
-
- nr_entries = 0;
- nr_desc = efi->efi_memmap_size / efi->efi_memdesc_size;
-
- for (i = 0; i < nr_desc; i++) {
- efi_memory_desc_t *d;
- unsigned int e820_type = 0;
- unsigned long m = efi->efi_memmap;
-
-#ifdef CONFIG_X86_64
- m |= (u64)efi->efi_memmap_hi << 32;
-#endif
-
- d = efi_early_memdesc_ptr(m, efi->efi_memdesc_size, i);
- switch (d->type) {
- case EFI_RESERVED_TYPE:
- case EFI_RUNTIME_SERVICES_CODE:
- case EFI_RUNTIME_SERVICES_DATA:
- case EFI_MEMORY_MAPPED_IO:
- case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
- case EFI_PAL_CODE:
- e820_type = E820_TYPE_RESERVED;
- break;
-
- case EFI_UNUSABLE_MEMORY:
- e820_type = E820_TYPE_UNUSABLE;
- break;
-
- case EFI_ACPI_RECLAIM_MEMORY:
- e820_type = E820_TYPE_ACPI;
- break;
-
- case EFI_LOADER_CODE:
- case EFI_LOADER_DATA:
- case EFI_BOOT_SERVICES_CODE:
- case EFI_BOOT_SERVICES_DATA:
- case EFI_CONVENTIONAL_MEMORY:
- if (efi_soft_reserve_enabled() &&
- (d->attribute & EFI_MEMORY_SP))
- e820_type = E820_TYPE_SOFT_RESERVED;
- else
- e820_type = E820_TYPE_RAM;
- break;
-
- case EFI_ACPI_MEMORY_NVS:
- e820_type = E820_TYPE_NVS;
- break;
-
- case EFI_PERSISTENT_MEMORY:
- e820_type = E820_TYPE_PMEM;
- break;
-
- default:
- continue;
- }
-
- /* Merge adjacent mappings */
- if (prev && prev->type == e820_type &&
- (prev->addr + prev->size) == d->phys_addr) {
- prev->size += d->num_pages << 12;
- continue;
- }
-
- if (nr_entries == ARRAY_SIZE(params->e820_table)) {
- u32 need = (nr_desc - i) * sizeof(struct e820_entry) +
- sizeof(struct setup_data);
-
- if (!e820ext || e820ext_size < need)
- return EFI_BUFFER_TOO_SMALL;
-
- /* boot_params map full, switch to e820 extended */
- entry = (struct boot_e820_entry *)e820ext->data;
- }
-
- entry->addr = d->phys_addr;
- entry->size = d->num_pages << PAGE_SHIFT;
- entry->type = e820_type;
- prev = entry++;
- nr_entries++;
- }
-
- if (nr_entries > ARRAY_SIZE(params->e820_table)) {
- u32 nr_e820ext = nr_entries - ARRAY_SIZE(params->e820_table);
-
- add_e820ext(params, e820ext, nr_e820ext);
- nr_entries -= nr_e820ext;
- }
-
- params->e820_entries = (u8)nr_entries;
-
- return EFI_SUCCESS;
-}
-
-static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
- u32 *e820ext_size)
-{
- efi_status_t status;
- unsigned long size;
-
- size = sizeof(struct setup_data) +
- sizeof(struct e820_entry) * nr_desc;
-
- if (*e820ext) {
- efi_bs_call(free_pool, *e820ext);
- *e820ext = NULL;
- *e820ext_size = 0;
- }
-
- status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
- (void **)e820ext);
- if (status == EFI_SUCCESS)
- *e820ext_size = size;
-
- return status;
-}
-
-static efi_status_t allocate_e820(struct boot_params *params,
- struct setup_data **e820ext,
- u32 *e820ext_size)
-{
- unsigned long map_size, desc_size, buff_size;
- struct efi_boot_memmap boot_map;
- efi_memory_desc_t *map;
- efi_status_t status;
- __u32 nr_desc;
-
- boot_map.map = &map;
- boot_map.map_size = &map_size;
- boot_map.desc_size = &desc_size;
- boot_map.desc_ver = NULL;
- boot_map.key_ptr = NULL;
- boot_map.buff_size = &buff_size;
-
- status = efi_get_memory_map(&boot_map);
- if (status != EFI_SUCCESS)
- return status;
-
- nr_desc = buff_size / desc_size;
-
- if (nr_desc > ARRAY_SIZE(params->e820_table)) {
- u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table);
-
- status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size);
- if (status != EFI_SUCCESS)
- return status;
- }
-
- return EFI_SUCCESS;
-}
-
-struct exit_boot_struct {
- struct boot_params *boot_params;
- struct efi_info *efi;
-};
-
-static efi_status_t exit_boot_func(struct efi_boot_memmap *map,
- void *priv)
-{
- const char *signature;
- struct exit_boot_struct *p = priv;
-
- signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
- : EFI32_LOADER_SIGNATURE;
- memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
-
- p->efi->efi_systab = (unsigned long)efi_system_table();
- p->efi->efi_memdesc_size = *map->desc_size;
- p->efi->efi_memdesc_version = *map->desc_ver;
- p->efi->efi_memmap = (unsigned long)*map->map;
- p->efi->efi_memmap_size = *map->map_size;
-
-#ifdef CONFIG_X86_64
- p->efi->efi_systab_hi = (unsigned long)efi_system_table() >> 32;
- p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
-#endif
-
- return EFI_SUCCESS;
-}
-
-static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
-{
- unsigned long map_sz, key, desc_size, buff_size;
- efi_memory_desc_t *mem_map;
- struct setup_data *e820ext = NULL;
- __u32 e820ext_size = 0;
- efi_status_t status;
- __u32 desc_version;
- struct efi_boot_memmap map;
- struct exit_boot_struct priv;
-
- map.map = &mem_map;
- map.map_size = &map_sz;
- map.desc_size = &desc_size;
- map.desc_ver = &desc_version;
- map.key_ptr = &key;
- map.buff_size = &buff_size;
- priv.boot_params = boot_params;
- priv.efi = &boot_params->efi_info;
-
- status = allocate_e820(boot_params, &e820ext, &e820ext_size);
- if (status != EFI_SUCCESS)
- return status;
-
- /* Might as well exit boot services now */
- status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func);
- if (status != EFI_SUCCESS)
- return status;
-
- /* Historic? */
- boot_params->alt_mem_k = 32 * 1024;
-
- status = setup_e820(boot_params, e820ext, e820ext_size);
- if (status != EFI_SUCCESS)
- return status;
-
- return EFI_SUCCESS;
-}
-
-/*
- * On success we return a pointer to a boot_params structure, and NULL
- * on failure.
- */
-struct boot_params *efi_main(efi_handle_t handle,
- efi_system_table_t *sys_table_arg,
- struct boot_params *boot_params)
-{
- struct desc_ptr *gdt = NULL;
- struct setup_header *hdr = &boot_params->hdr;
- efi_status_t status;
- struct desc_struct *desc;
- unsigned long cmdline_paddr;
-
- sys_table = sys_table_arg;
-
- /* Check if we were booted by the EFI firmware */
- if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- goto fail;
-
- /*
- * make_boot_params() may have been called before efi_main(), in which
- * case this is the second time we parse the cmdline. This is ok,
- * parsing the cmdline multiple times does not have side-effects.
- */
- cmdline_paddr = ((u64)hdr->cmd_line_ptr |
- ((u64)boot_params->ext_cmd_line_ptr << 32));
- efi_parse_options((char *)cmdline_paddr);
-
- /*
- * If the boot loader gave us a value for secure_boot then we use that,
- * otherwise we ask the BIOS.
- */
- if (boot_params->secure_boot == efi_secureboot_mode_unset)
- boot_params->secure_boot = efi_get_secureboot();
-
- /* Ask the firmware to clear memory on unclean shutdown */
- efi_enable_reset_attack_mitigation();
-
- efi_random_get_seed();
-
- efi_retrieve_tpm2_eventlog();
-
- setup_graphics(boot_params);
-
- setup_efi_pci(boot_params);
-
- setup_quirks(boot_params);
-
- status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt),
- (void **)&gdt);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to allocate memory for 'gdt' structure\n");
- goto fail;
- }
-
- gdt->size = 0x800;
- status = efi_low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to allocate memory for 'gdt'\n");
- goto fail;
- }
-
- /*
- * If the kernel isn't already loaded at the preferred load
- * address, relocate it.
- */
- if (hdr->pref_address != hdr->code32_start) {
- unsigned long bzimage_addr = hdr->code32_start;
- status = efi_relocate_kernel(&bzimage_addr,
- hdr->init_size, hdr->init_size,
- hdr->pref_address,
- hdr->kernel_alignment,
- LOAD_PHYSICAL_ADDR);
- if (status != EFI_SUCCESS) {
- efi_printk("efi_relocate_kernel() failed!\n");
- goto fail;
- }
-
- hdr->pref_address = hdr->code32_start;
- hdr->code32_start = bzimage_addr;
- }
-
- status = exit_boot(boot_params, handle);
- if (status != EFI_SUCCESS) {
- efi_printk("exit_boot() failed!\n");
- goto fail;
- }
-
- memset((char *)gdt->address, 0x0, gdt->size);
- desc = (struct desc_struct *)gdt->address;
-
- /* The first GDT is a dummy. */
- desc++;
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- /* __KERNEL32_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
-
- desc++;
- } else {
- /* Second entry is unused on 32-bit */
- desc++;
- }
-
- /* __KERNEL_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- desc->l = 1;
- desc->d = 0;
- } else {
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- }
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
- desc++;
-
- /* __KERNEL_DS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
- desc++;
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- /* Task segment value */
- desc->limit0 = 0x0000;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_TSS;
- desc->s = 0;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0x0;
- desc->avl = 0;
- desc->l = 0;
- desc->d = 0;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
- desc++;
- }
-
- asm volatile("cli");
- asm volatile ("lgdt %0" : : "m" (*gdt));
-
- return boot_params;
-fail:
- efi_printk("efi_main() failed!\n");
-
- for (;;)
- asm("hlt");
-}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
deleted file mode 100644
index 99f35343d443..000000000000
--- a/arch/x86/boot/compressed/eboot.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BOOT_COMPRESSED_EBOOT_H
-#define BOOT_COMPRESSED_EBOOT_H
-
-#define SEG_TYPE_DATA (0 << 3)
-#define SEG_TYPE_READ_WRITE (1 << 1)
-#define SEG_TYPE_CODE (1 << 3)
-#define SEG_TYPE_EXEC_READ (1 << 1)
-#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
-#define SEG_OP_SIZE_32BIT (1 << 0)
-#define SEG_GRANULARITY_4KB (1 << 0)
-
-#define DESC_TYPE_CODE_DATA (1 << 0)
-
-typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
-
-union efi_uga_draw_protocol {
- struct {
- efi_status_t (__efiapi *get_mode)(efi_uga_draw_protocol_t *,
- u32*, u32*, u32*, u32*);
- void *set_mode;
- void *blt;
- };
- struct {
- u32 get_mode;
- u32 set_mode;
- u32 blt;
- } mixed_mode;
-};
-
-#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index 8fb7f6799c52..2b2049259619 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -54,11 +54,16 @@ SYM_FUNC_START(__efi64_thunk)
* Switch to gdt with 32-bit segments. This is the firmware GDT
* that was installed when the kernel started executing. This
* pointer was saved at the EFI stub entry point in head_64.S.
+ *
+ * Pass the saved DS selector to the 32-bit code, and use far return to
+ * restore the saved CS selector.
*/
leaq efi32_boot_gdt(%rip), %rax
lgdt (%rax)
- pushq $__KERNEL_CS
+ movzwl efi32_boot_ds(%rip), %edx
+ movzwq efi32_boot_cs(%rip), %rax
+ pushq %rax
leaq efi_enter32(%rip), %rax
pushq %rax
lretq
@@ -73,6 +78,10 @@ SYM_FUNC_START(__efi64_thunk)
movl %ebx, %es
pop %rbx
movl %ebx, %ds
+ /* Clear out 32-bit selector from FS and GS */
+ xorl %ebx, %ebx
+ movl %ebx, %fs
+ movl %ebx, %gs
/*
* Convert 32-bit status code into 64-bit.
@@ -92,10 +101,12 @@ SYM_FUNC_END(__efi64_thunk)
* The stack should represent the 32-bit calling convention.
*/
SYM_FUNC_START_LOCAL(efi_enter32)
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
+ /* Load firmware selector into data and stack segment registers */
+ movl %edx, %ds
+ movl %edx, %es
+ movl %edx, %fs
+ movl %edx, %gs
+ movl %edx, %ss
/* Reload pgtables */
movl %cr3, %eax
@@ -157,6 +168,14 @@ SYM_DATA_START(efi32_boot_gdt)
.quad 0
SYM_DATA_END(efi32_boot_gdt)
+SYM_DATA_START(efi32_boot_cs)
+ .word 0
+SYM_DATA_END(efi32_boot_cs)
+
+SYM_DATA_START(efi32_boot_ds)
+ .word 0
+SYM_DATA_END(efi32_boot_ds)
+
SYM_DATA_START(efi_gdt64)
.word efi_gdt64_end - efi_gdt64
.long 0 /* Filled out by user */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 73f17d0544dd..ab3307036ba4 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -63,21 +63,7 @@
__HEAD
SYM_FUNC_START(startup_32)
cld
- /*
- * Test KEEP_SEGMENTS flag to see if the bootloader is asking
- * us to not reload segments
- */
- testb $KEEP_SEGMENTS, BP_loadflags(%esi)
- jnz 1f
-
cli
- movl $__BOOT_DS, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %fs
- movl %eax, %gs
- movl %eax, %ss
-1:
/*
* Calculate the delta between where we were compiled to run
@@ -89,32 +75,59 @@ SYM_FUNC_START(startup_32)
*/
leal (BP_scratch+4)(%esi), %esp
call 1f
-1: popl %ebp
- subl $1b, %ebp
+1: popl %edx
+ subl $1b, %edx
+
+ /* Load new GDT */
+ leal gdt(%edx), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
/*
- * %ebp contains the address we are loaded at by the boot loader and %ebx
+ * %edx contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
- * for safe in-place decompression.
+ * for safe in-place decompression. %ebp contains the address that the kernel
+ * will be decompressed to.
*/
#ifdef CONFIG_RELOCATABLE
- movl %ebp, %ebx
+ movl %edx, %ebx
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32() will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry() will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ subl image_offset(%edx), %ebx
+#endif
+
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
notl %eax
andl %eax, %ebx
cmpl $LOAD_PHYSICAL_ADDR, %ebx
- jge 1f
+ jae 1f
#endif
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
+ movl %ebx, %ebp // Save the output address for later
/* Target address to relocate to for decompression */
- movl BP_init_size(%esi), %eax
- subl $_end, %eax
- addl %eax, %ebx
+ addl BP_init_size(%esi), %ebx
+ subl $_end, %ebx
/* Set up the stack */
leal boot_stack_end(%ebx), %esp
@@ -128,7 +141,7 @@ SYM_FUNC_START(startup_32)
* where decompression in place becomes safe.
*/
pushl %esi
- leal (_bss-4)(%ebp), %esi
+ leal (_bss-4)(%edx), %esi
leal (_bss-4)(%ebx), %edi
movl $(_bss - startup_32), %ecx
shrl $2, %ecx
@@ -137,6 +150,15 @@ SYM_FUNC_START(startup_32)
cld
popl %esi
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leal gdt(%ebx), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
/*
* Jump to the relocated address.
*/
@@ -148,9 +170,8 @@ SYM_FUNC_END(startup_32)
SYM_FUNC_START(efi32_stub_entry)
SYM_FUNC_START_ALIAS(efi_stub_entry)
add $0x4, %esp
+ movl 8(%esp), %esi /* save boot_params pointer */
call efi_main
- movl %eax, %esi
- movl BP_code32_start(%esi), %eax
leal startup_32(%eax), %eax
jmp *%eax
SYM_FUNC_END(efi32_stub_entry)
@@ -189,9 +210,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
/* push arguments for extract_kernel: */
pushl $z_output_len /* decompressed length, end of relocs */
- leal _end(%ebx), %eax
- subl BP_init_size(%esi), %eax
- pushl %eax /* output address */
+ pushl %ebp /* output address */
pushl $z_input_len /* input_len */
leal input_data(%ebx), %eax
@@ -209,6 +228,21 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
jmp *%eax
SYM_FUNC_END(.Lrelocated)
+ .data
+ .balign 8
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt - 1
+ .long 0
+ .word 0
+ .quad 0x0000000000000000 /* Reserved */
+ .quad 0x00cf9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+#ifdef CONFIG_EFI_STUB
+SYM_DATA(image_offset, .long 0)
+#endif
+
/*
* Stack and heap for uncompression
*/
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1f1f6c8139b3..4f7e6b84be07 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -53,19 +53,7 @@ SYM_FUNC_START(startup_32)
* all need to be under the 4G limit.
*/
cld
- /*
- * Test KEEP_SEGMENTS flag to see if the bootloader is asking
- * us to not reload segments
- */
- testb $KEEP_SEGMENTS, BP_loadflags(%esi)
- jnz 1f
-
cli
- movl $(__BOOT_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
-1:
/*
* Calculate the delta between where we were compiled to run
@@ -80,10 +68,21 @@ SYM_FUNC_START(startup_32)
1: popl %ebp
subl $1b, %ebp
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal gdt(%ebp), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
+
/* setup a stack and make sure cpu supports long mode. */
- movl $boot_stack_end, %eax
- addl %ebp, %eax
- movl %eax, %esp
+ leal boot_stack_end(%ebp), %esp
call verify_cpu
testl %eax, %eax
@@ -100,30 +99,38 @@ SYM_FUNC_START(startup_32)
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ subl image_offset(%ebp), %ebx
+#endif
+
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
notl %eax
andl %eax, %ebx
cmpl $LOAD_PHYSICAL_ADDR, %ebx
- jge 1f
+ jae 1f
#endif
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
/* Target address to relocate to for decompression */
- movl BP_init_size(%esi), %eax
- subl $_end, %eax
- addl %eax, %ebx
+ addl BP_init_size(%esi), %ebx
+ subl $_end, %ebx
/*
* Prepare for entering 64 bit mode
*/
- /* Load new GDT with the 64bit segments using 32bit descriptor */
- addl %ebp, gdt+2(%ebp)
- lgdt gdt(%ebp)
-
/* Enable PAE mode */
movl %cr4, %eax
orl $X86_CR4_PAE, %eax
@@ -212,8 +219,13 @@ SYM_FUNC_START(startup_32)
cmp $0, %edi
jz 1f
leal efi64_stub_entry(%ebp), %eax
- movl %esi, %edx
movl efi32_boot_args+4(%ebp), %esi
+ movl efi32_boot_args+8(%ebp), %edx // saved bootparams pointer
+ cmpl $0, %edx
+ jnz 1f
+ leal efi_pe_entry(%ebp), %eax
+ movl %edi, %ecx // MS calling convention
+ movl %esi, %edx
1:
#endif
pushl %eax
@@ -238,11 +250,17 @@ SYM_FUNC_START(efi32_stub_entry)
1: pop %ebp
subl $1b, %ebp
+ movl %esi, efi32_boot_args+8(%ebp)
+SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
movl %ecx, efi32_boot_args(%ebp)
movl %edx, efi32_boot_args+4(%ebp)
- sgdtl efi32_boot_gdt(%ebp)
movb $0, efi_is64(%ebp)
+ /* Save firmware GDTR and code/data selectors */
+ sgdtl efi32_boot_gdt(%ebp)
+ movw %cs, efi32_boot_cs(%ebp)
+ movw %ds, efi32_boot_ds(%ebp)
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
@@ -266,6 +284,9 @@ SYM_CODE_START(startup_64)
* and command line.
*/
+ cld
+ cli
+
/* Setup data segments. */
xorl %eax, %eax
movl %eax, %ds
@@ -290,13 +311,27 @@ SYM_CODE_START(startup_64)
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ movl image_offset(%rip), %eax
+ subq %rax, %rbp
+#endif
+
movl BP_kernel_alignment(%rsi), %eax
decl %eax
addq %rax, %rbp
notq %rax
andq %rax, %rbp
cmpq $LOAD_PHYSICAL_ADDR, %rbp
- jge 1f
+ jae 1f
#endif
movq $LOAD_PHYSICAL_ADDR, %rbp
1:
@@ -354,9 +389,9 @@ SYM_CODE_START(startup_64)
*/
/* Make sure we have GDT with 32-bit code segment */
- leaq gdt(%rip), %rax
- movq %rax, gdt64+2(%rip)
- lgdt gdt64(%rip)
+ leaq gdt64(%rip), %rax
+ addq %rax, 2(%rax)
+ lgdt (%rax)
/*
* paging_prepare() sets up the trampoline and checks if we need to
@@ -441,6 +476,16 @@ trampoline_return:
cld
popq %rsi
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leaq gdt64(%rbx), %rax
+ leaq gdt(%rbx), %rdx
+ movq %rdx, 2(%rax)
+ lgdt (%rax)
+
/*
* Jump to the relocated address.
*/
@@ -453,9 +498,9 @@ SYM_CODE_END(startup_64)
SYM_FUNC_START(efi64_stub_entry)
SYM_FUNC_START_ALIAS(efi_stub_entry)
and $~0xf, %rsp /* realign the stack */
+ movq %rdx, %rbx /* save boot_params pointer */
call efi_main
- movq %rax,%rsi
- movl BP_code32_start(%esi), %eax
+ movq %rbx,%rsi
leaq startup_64(%rax), %rax
jmp *%rax
SYM_FUNC_END(efi64_stub_entry)
@@ -484,7 +529,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
leaq input_data(%rip), %rdx /* input_data */
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
- movq $z_output_len, %r9 /* decompressed length, end of relocs */
+ movl $z_output_len, %r9d /* decompressed length, end of relocs */
call extract_kernel /* returns kernel location in %rax */
popq %rsi
@@ -613,13 +658,13 @@ SYM_FUNC_END(.Lno_longmode)
.data
SYM_DATA_START_LOCAL(gdt64)
- .word gdt_end - gdt
- .quad 0
+ .word gdt_end - gdt - 1
+ .quad gdt - gdt64
SYM_DATA_END(gdt64)
.balign 8
SYM_DATA_START_LOCAL(gdt)
- .word gdt_end - gdt
- .long gdt
+ .word gdt_end - gdt - 1
+ .long 0
.word 0
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
@@ -628,9 +673,97 @@ SYM_DATA_START_LOCAL(gdt)
.quad 0x0000000000000000 /* TS continued */
SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+#ifdef CONFIG_EFI_STUB
+SYM_DATA(image_offset, .long 0)
+#endif
+
#ifdef CONFIG_EFI_MIXED
-SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0)
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
SYM_DATA(efi_is64, .byte 1)
+
+#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
+#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
+#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
+
+ .text
+ .code32
+SYM_FUNC_START(efi32_pe_entry)
+/*
+ * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
+ * efi_system_table_32_t *sys_table)
+ */
+
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %eax // dummy push to allocate loaded_image
+
+ pushl %ebx // save callee-save registers
+ pushl %edi
+
+ call verify_cpu // check for long mode support
+ testl %eax, %eax
+ movl $0x80000003, %eax // EFI_UNSUPPORTED
+ jnz 2f
+
+ call 1f
+1: pop %ebx
+ subl $1b, %ebx
+
+ /* Get the loaded image protocol pointer from the image handle */
+ leal -4(%ebp), %eax
+ pushl %eax // &loaded_image
+ leal loaded_image_proto(%ebx), %eax
+ pushl %eax // pass the GUID address
+ pushl 8(%ebp) // pass the image handle
+
+ /*
+ * Note the alignment of the stack frame.
+ * sys_table
+ * handle <-- 16-byte aligned on entry by ABI
+ * return address
+ * frame pointer
+ * loaded_image <-- local variable
+ * saved %ebx <-- 16-byte aligned here
+ * saved %edi
+ * &loaded_image
+ * &loaded_image_proto
+ * handle <-- 16-byte aligned for call to handle_protocol
+ */
+
+ movl 12(%ebp), %eax // sys_table
+ movl ST32_boottime(%eax), %eax // sys_table->boottime
+ call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
+ addl $12, %esp // restore argument space
+ testl %eax, %eax
+ jnz 2f
+
+ movl 8(%ebp), %ecx // image_handle
+ movl 12(%ebp), %edx // sys_table
+ movl -4(%ebp), %esi // loaded_image
+ movl LI32_image_base(%esi), %esi // loaded_image->image_base
+ movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
+ /*
+ * We need to set the image_offset variable here since startup_32() will
+ * use it before we get to the 64-bit efi_pe_entry() in C code.
+ */
+ subl %esi, %ebx
+ movl %ebx, image_offset(%ebp) // save image_offset
+ jmp efi32_pe_stub_entry
+
+2: popl %edi // restore callee-save registers
+ popl %ebx
+ leave
+ ret
+SYM_FUNC_END(efi32_pe_entry)
+
+ .section ".rodata"
+ /* EFI loaded image protocol GUID */
+ .balign 4
+SYM_DATA_START_LOCAL(loaded_image_proto)
+ .long 0x5b1b31a1
+ .word 0x9562, 0x11d2
+ .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
+SYM_DATA_END(loaded_image_proto)
#endif
/*
@@ -647,7 +780,7 @@ SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
/*
* Space for page tables (not in .bss so not zeroed)
*/
- .section ".pgtable","a",@nobits
+ .section ".pgtable","aw",@nobits
.balign 4096
SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c
index 748456c365f4..9557c5a15b91 100644
--- a/arch/x86/boot/compressed/kaslr_64.c
+++ b/arch/x86/boot/compressed/kaslr_64.c
@@ -29,9 +29,6 @@
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
#include "../../mm/ident_map.c"
-/* Used by pgtable.h asm code to force instruction serialization. */
-unsigned long __force_order;
-
/* Used to track our page table allocation area. */
struct alloc_pgt_data {
unsigned char *pgt_buf;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index c8181392f70d..726e264410ff 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -59,7 +59,7 @@ void __puthex(unsigned long value);
static inline void debug_putstr(const char *s)
{ }
-static inline void debug_puthex(const char *s)
+static inline void debug_puthex(unsigned long value)
{ }
#define debug_putaddr(x) /* */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 97d9b6d6c1af..735ad7f21ab0 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -15,7 +15,7 @@
* hex while segment addresses are written as segment:offset.
*
*/
-
+#include <linux/pe.h>
#include <asm/segment.h>
#include <asm/boot.h>
#include <asm/page_types.h>
@@ -43,8 +43,7 @@ SYSSEG = 0x1000 /* historical load address >> 4 */
bootsect_start:
#ifdef CONFIG_EFI_STUB
# "MZ", MS-DOS header
- .byte 0x4d
- .byte 0x5a
+ .word MZ_MAGIC
#endif
# Normalize the start address
@@ -97,39 +96,30 @@ bugger_off_msg:
#ifdef CONFIG_EFI_STUB
pe_header:
- .ascii "PE"
- .word 0
+ .long PE_MAGIC
coff_header:
#ifdef CONFIG_X86_32
- .word 0x14c # i386
+ .set image_file_add_flags, IMAGE_FILE_32BIT_MACHINE
+ .set pe_opt_magic, PE_OPT_MAGIC_PE32
+ .word IMAGE_FILE_MACHINE_I386
#else
- .word 0x8664 # x86-64
+ .set image_file_add_flags, 0
+ .set pe_opt_magic, PE_OPT_MAGIC_PE32PLUS
+ .word IMAGE_FILE_MACHINE_AMD64
#endif
- .word 4 # nr_sections
+ .word section_count # nr_sections
.long 0 # TimeDateStamp
.long 0 # PointerToSymbolTable
.long 1 # NumberOfSymbols
.word section_table - optional_header # SizeOfOptionalHeader
-#ifdef CONFIG_X86_32
- .word 0x306 # Characteristics.
- # IMAGE_FILE_32BIT_MACHINE |
- # IMAGE_FILE_DEBUG_STRIPPED |
- # IMAGE_FILE_EXECUTABLE_IMAGE |
- # IMAGE_FILE_LINE_NUMS_STRIPPED
-#else
- .word 0x206 # Characteristics
- # IMAGE_FILE_DEBUG_STRIPPED |
- # IMAGE_FILE_EXECUTABLE_IMAGE |
- # IMAGE_FILE_LINE_NUMS_STRIPPED
-#endif
+ .word IMAGE_FILE_EXECUTABLE_IMAGE | \
+ image_file_add_flags | \
+ IMAGE_FILE_DEBUG_STRIPPED | \
+ IMAGE_FILE_LINE_NUMS_STRIPPED # Characteristics
optional_header:
-#ifdef CONFIG_X86_32
- .word 0x10b # PE32 format
-#else
- .word 0x20b # PE32+ format
-#endif
+ .word pe_opt_magic
.byte 0x02 # MajorLinkerVersion
.byte 0x14 # MinorLinkerVersion
@@ -148,17 +138,19 @@ optional_header:
#endif
extra_header_fields:
+ # PE specification requires ImageBase to be 64k aligned
+ .set image_base, (LOAD_PHYSICAL_ADDR + 0xffff) & ~0xffff
#ifdef CONFIG_X86_32
- .long 0 # ImageBase
+ .long image_base # ImageBase
#else
- .quad 0 # ImageBase
+ .quad image_base # ImageBase
#endif
.long 0x20 # SectionAlignment
.long 0x20 # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
- .word 0 # MajorImageVersion
- .word 0 # MinorImageVersion
+ .word LINUX_EFISTUB_MAJOR_VERSION # MajorImageVersion
+ .word LINUX_EFISTUB_MINOR_VERSION # MinorImageVersion
.word 0 # MajorSubsystemVersion
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue
@@ -170,7 +162,7 @@ extra_header_fields:
.long 0x200 # SizeOfHeaders
.long 0 # CheckSum
- .word 0xa # Subsystem (EFI application)
+ .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
.word 0 # DllCharacteristics
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
@@ -184,7 +176,7 @@ extra_header_fields:
.quad 0 # SizeOfHeapCommit
#endif
.long 0 # LoaderFlags
- .long 0x6 # NumberOfRvaAndSizes
+ .long (section_table - .) / 8 # NumberOfRvaAndSizes
.quad 0 # ExportTable
.quad 0 # ImportTable
@@ -210,7 +202,10 @@ section_table:
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
- .long 0x60500020 # Characteristics (section flags)
+ .long IMAGE_SCN_CNT_CODE | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_EXECUTE | \
+ IMAGE_SCN_ALIGN_16BYTES # Characteristics
#
# The EFI application loader requires a relocation section
@@ -228,45 +223,53 @@ section_table:
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
- .long 0x42100040 # Characteristics (section flags)
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_DISCARDABLE | \
+ IMAGE_SCN_ALIGN_1BYTES # Characteristics
+#ifdef CONFIG_EFI_MIXED
#
# The offset & size fields are filled in by build.c.
#
- .ascii ".text"
- .byte 0
- .byte 0
- .byte 0
+ .asciz ".compat"
.long 0
- .long 0x0 # startup_{32,64}
+ .long 0x0
.long 0 # Size of initialized data
# on disk
- .long 0x0 # startup_{32,64}
+ .long 0x0
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
- .long 0x60500020 # Characteristics (section flags)
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_DISCARDABLE | \
+ IMAGE_SCN_ALIGN_1BYTES # Characteristics
+#endif
#
# The offset & size fields are filled in by build.c.
#
- .ascii ".bss"
- .byte 0
+ .ascii ".text"
.byte 0
.byte 0
.byte 0
.long 0
- .long 0x0
+ .long 0x0 # startup_{32,64}
.long 0 # Size of initialized data
# on disk
- .long 0x0
+ .long 0x0 # startup_{32,64}
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
- .long 0xc8000080 # Characteristics (section flags)
+ .long IMAGE_SCN_CNT_CODE | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_EXECUTE | \
+ IMAGE_SCN_ALIGN_16BYTES # Characteristics
+ .set section_count, (. - section_table) / 40
#endif /* CONFIG_EFI_STUB */
# Kernel attributes; used by setup. This is part 1 of the
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 3da1c37c6dd5..24c95522f231 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -52,7 +52,6 @@ SECTIONS
_end = .;
/DISCARD/ : {
- *(.eh_frame)
*(.note*)
}
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 55e669d29e54..8f8c8e386cea 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -53,11 +53,20 @@ u8 buf[SETUP_SECT_MAX*512];
#define PECOFF_RELOC_RESERVE 0x20
+#ifdef CONFIG_EFI_MIXED
+#define PECOFF_COMPAT_RESERVE 0x20
+#else
+#define PECOFF_COMPAT_RESERVE 0x0
+#endif
+
unsigned long efi32_stub_entry;
unsigned long efi64_stub_entry;
unsigned long efi_pe_entry;
+unsigned long efi32_pe_entry;
unsigned long kernel_info;
unsigned long startup_64;
+unsigned long _ehead;
+unsigned long _end;
/*----------------------------------------------------------------------*/
@@ -189,7 +198,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz
static void update_pecoff_setup_and_reloc(unsigned int size)
{
u32 setup_offset = 0x200;
- u32 reloc_offset = size - PECOFF_RELOC_RESERVE;
+ u32 reloc_offset = size - PECOFF_RELOC_RESERVE - PECOFF_COMPAT_RESERVE;
+#ifdef CONFIG_EFI_MIXED
+ u32 compat_offset = reloc_offset + PECOFF_RELOC_RESERVE;
+#endif
u32 setup_size = reloc_offset - setup_offset;
update_pecoff_section_header(".setup", setup_offset, setup_size);
@@ -201,43 +213,59 @@ static void update_pecoff_setup_and_reloc(unsigned int size)
*/
put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
put_unaligned_le32(10, &buf[reloc_offset + 4]);
+
+#ifdef CONFIG_EFI_MIXED
+ update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE);
+
+ /*
+ * Put the IA-32 machine type (0x14c) and the associated entry point
+ * address in the .compat section, so loaders can figure out which other
+ * execution modes this image supports.
+ */
+ buf[compat_offset] = 0x1;
+ buf[compat_offset + 1] = 0x8;
+ put_unaligned_le16(0x14c, &buf[compat_offset + 2]);
+ put_unaligned_le32(efi32_pe_entry + size, &buf[compat_offset + 4]);
+#endif
}
-static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
+static void update_pecoff_text(unsigned int text_start, unsigned int file_sz,
+ unsigned int init_sz)
{
unsigned int pe_header;
unsigned int text_sz = file_sz - text_start;
+ unsigned int bss_sz = init_sz - file_sz;
pe_header = get_unaligned_le32(&buf[0x3c]);
/*
+ * The PE/COFF loader may load the image at an address which is
+ * misaligned with respect to the kernel_alignment field in the setup
+ * header.
+ *
+ * In order to avoid relocating the kernel to correct the misalignment,
+ * add slack to allow the buffer to be aligned within the declared size
+ * of the image.
+ */
+ bss_sz += CONFIG_PHYSICAL_ALIGN;
+ init_sz += CONFIG_PHYSICAL_ALIGN;
+
+ /*
* Size of code: Subtract the size of the first sector (512 bytes)
* which includes the header.
*/
- put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
+ put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]);
+
+ /* Size of image */
+ put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
/*
* Address of entry point for PE/COFF executable
*/
put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
- update_pecoff_section_header(".text", text_start, text_sz);
-}
-
-static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz)
-{
- unsigned int pe_header;
- unsigned int bss_sz = init_sz - file_sz;
-
- pe_header = get_unaligned_le32(&buf[0x3c]);
-
- /* Size of uninitialized data */
- put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]);
-
- /* Size of image */
- put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
-
- update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0);
+ update_pecoff_section_header_fields(".text", text_start, text_sz + bss_sz,
+ text_sz, text_start);
}
static int reserve_pecoff_reloc_section(int c)
@@ -278,9 +306,8 @@ static void efi_stub_entry_update(void)
static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
static inline void update_pecoff_text(unsigned int text_start,
- unsigned int file_sz) {}
-static inline void update_pecoff_bss(unsigned int file_sz,
- unsigned int init_sz) {}
+ unsigned int file_sz,
+ unsigned int init_sz) {}
static inline void efi_stub_defaults(void) {}
static inline void efi_stub_entry_update(void) {}
@@ -290,6 +317,12 @@ static inline int reserve_pecoff_reloc_section(int c)
}
#endif /* CONFIG_EFI_STUB */
+static int reserve_pecoff_compat_section(int c)
+{
+ /* Reserve 0x20 bytes for .compat section */
+ memset(buf+c, 0, PECOFF_COMPAT_RESERVE);
+ return PECOFF_COMPAT_RESERVE;
+}
/*
* Parse zoffset.h and find the entry points. We could just #include zoffset.h
@@ -322,8 +355,11 @@ static void parse_zoffset(char *fname)
PARSE_ZOFS(p, efi32_stub_entry);
PARSE_ZOFS(p, efi64_stub_entry);
PARSE_ZOFS(p, efi_pe_entry);
+ PARSE_ZOFS(p, efi32_pe_entry);
PARSE_ZOFS(p, kernel_info);
PARSE_ZOFS(p, startup_64);
+ PARSE_ZOFS(p, _ehead);
+ PARSE_ZOFS(p, _end);
p = strchr(p, '\n');
while (p && (*p == '\r' || *p == '\n'))
@@ -365,6 +401,7 @@ int main(int argc, char ** argv)
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);
+ c += reserve_pecoff_compat_section(c);
c += reserve_pecoff_reloc_section(c);
/* Pad unused space with zeros */
@@ -406,9 +443,28 @@ int main(int argc, char ** argv)
buf[0x1f1] = setup_sectors-1;
put_unaligned_le32(sys_size, &buf[0x1f4]);
- update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
init_sz = get_unaligned_le32(&buf[0x260]);
- update_pecoff_bss(i + (sys_size * 16), init_sz);
+#ifdef CONFIG_EFI_STUB
+ /*
+ * The decompression buffer will start at ImageBase. When relocating
+ * the compressed kernel to its end, we must ensure that the head
+ * section does not get overwritten. The head section occupies
+ * [i, i + _ehead), and the destination is [init_sz - _end, init_sz).
+ *
+ * At present these should never overlap, because 'i' is at most 32k
+ * because of SETUP_SECT_MAX, '_ehead' is less than 1k, and the
+ * calculation of INIT_SIZE in boot/header.S ensures that
+ * 'init_sz - _end' is at least 64k.
+ *
+ * For future-proofing, increase init_sz if necessary.
+ */
+
+ if (init_sz - _end < i + _ehead) {
+ init_sz = (i + _ehead + _end + 4095) & ~4095;
+ put_unaligned_le32(init_sz, &buf[0x260]);
+ }
+#endif
+ update_pecoff_text(setup_sectors * 512, i + (sys_size * 16), init_sz);
efi_stub_entry_update();
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 59ce9ed58430..5b602beb0b72 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -125,7 +125,6 @@ CONFIG_IP6_NF_MANGLE=y
CONFIG_NET_SCHED=y
CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
-CONFIG_HAMRADIO=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
@@ -171,7 +170,6 @@ CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
# CONFIG_8139TOO_PIO is not set
CONFIG_R8169=y
-CONFIG_FDDI=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 0b9654c7a05c..f3d1f36103b1 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -123,7 +123,6 @@ CONFIG_IP6_NF_MANGLE=y
CONFIG_NET_SCHED=y
CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
-CONFIG_HAMRADIO=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
@@ -164,7 +163,6 @@ CONFIG_SKY2=y
CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
CONFIG_R8169=y
-CONFIG_FDDI=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b69e00bf20b8..8c2e9eadee8a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
+adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no)
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
-obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+
+# These modules require the assembler to support ADX.
+ifeq ($(adx_supported),yes)
+ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+endif
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index bbbebbd35b5d..75b6ea20491e 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1064,7 +1064,7 @@ static struct aead_alg aesni_aeads[0];
static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];
static const struct x86_cpu_id aesni_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AES),
+ X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 418bd88acac8..7c4c7b2fbf05 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -170,7 +170,7 @@ static struct shash_alg alg = {
};
static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
+ X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index c20d1b8a82c3..d2d069bd459b 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -221,7 +221,7 @@ static struct shash_alg alg = {
};
static const struct x86_cpu_id crc32c_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
+ X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index 3c81e15b0873..71291d5af9f4 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -114,7 +114,7 @@ static struct shash_alg alg = {
};
static const struct x86_cpu_id crct10dif_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
+ X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index a4b728518e28..1f1a95f3dd0c 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -313,7 +313,7 @@ static struct ahash_alg ghash_async_alg = {
};
static const struct x86_cpu_id pcmul_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
+ X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
{}
};
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 06fc70cf5433..85eb381259c2 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -14,4 +14,5 @@ obj-y += vdso/
obj-y += vsyscall/
obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
+obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9747876980b5..76735ec813e6 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -34,6 +34,7 @@
#include <asm/fpu/api.h>
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
+#include <asm/syscall.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -333,20 +334,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
if (likely(nr < IA32_NR_syscalls)) {
nr = array_index_nospec(nr, IA32_NR_syscalls);
-#ifdef CONFIG_IA32_EMULATION
regs->ax = ia32_sys_call_table[nr](regs);
-#else
- /*
- * It's possible that a 32-bit syscall implementation
- * takes a 64-bit parameter but nonetheless assumes that
- * the high bits are zero. Make sure we zero-extend all
- * of the args.
- */
- regs->ax = ia32_sys_call_table[nr](
- (unsigned int)regs->bx, (unsigned int)regs->cx,
- (unsigned int)regs->dx, (unsigned int)regs->si,
- (unsigned int)regs->di, (unsigned int)regs->bp);
-#endif /* CONFIG_IA32_EMULATION */
}
syscall_return_slowpath(regs);
@@ -438,3 +426,8 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
#endif
}
#endif
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+ return -ENOSYS;
+}
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7e0560442538..b67bae7091d7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1088,10 +1088,10 @@ SYM_FUNC_START(entry_INT80_32)
STACKLEAK_ERASE
restore_all:
- TRACE_IRQS_IRET
+ TRACE_IRQS_ON
SWITCH_TO_ENTRY_STACK
CHECK_AND_APPLY_ESPFIX
-.Lrestore_nocheck:
+
/* Switch back to user CR3 */
SWITCH_TO_USER_CR3 scratch_reg=%eax
@@ -1290,7 +1290,7 @@ SYM_CODE_END(simd_coprocessor_error)
SYM_CODE_START(device_not_available)
ASM_CLAC
- pushl $-1 # mark this as an int
+ pushl $0
pushl $do_device_not_available
jmp common_exception
SYM_CODE_END(device_not_available)
@@ -1365,7 +1365,7 @@ SYM_CODE_END(divide_error)
SYM_CODE_START(machine_check)
ASM_CLAC
pushl $0
- pushl machine_check_vector
+ pushl $do_mce
jmp common_exception
SYM_CODE_END(machine_check)
#endif
@@ -1531,7 +1531,7 @@ SYM_CODE_START(debug)
* Entry from sysenter is now handled in common_exception
*/
ASM_CLAC
- pushl $-1 # mark this as an int
+ pushl $0
pushl $do_debug
jmp common_exception
SYM_CODE_END(debug)
@@ -1682,18 +1682,13 @@ SYM_CODE_END(nmi)
SYM_CODE_START(int3)
ASM_CLAC
- pushl $-1 # mark this as an int
-
- SAVE_ALL switch_stacks=1
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
- xorl %edx, %edx # zero error code
- movl %esp, %eax # pt_regs pointer
- call do_int3
- jmp ret_from_exception
+ pushl $0
+ pushl $do_int3
+ jmp common_exception
SYM_CODE_END(int3)
SYM_CODE_START(general_protection)
+ ASM_CLAC
pushl $do_general_protection
jmp common_exception
SYM_CODE_END(general_protection)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f2bb91e87877..0e9504fabe52 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -174,7 +174,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
movq %rsp, %rsi
call do_syscall_64 /* returns with IRQs disabled */
- TRACE_IRQS_IRETQ /* we're about to change IF */
+ TRACE_IRQS_ON /* return enables interrupts */
/*
* Try to use SYSRET instead of IRET if we're returning to
@@ -619,7 +619,7 @@ ret_from_intr:
.Lretint_user:
mov %rsp,%rdi
call prepare_exit_to_usermode
- TRACE_IRQS_IRETQ
+ TRACE_IRQS_ON
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 7d17b3addbbb..86eb0d89d46f 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -4,29 +4,22 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
-#include <asm/asm-offsets.h>
+#include <linux/syscalls.h>
+#include <asm/unistd.h>
#include <asm/syscall.h>
-#ifdef CONFIG_IA32_EMULATION
-/* On X86_64, we use struct pt_regs * to pass parameters to syscalls */
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
-#define __sys_ni_syscall __ia32_sys_ni_syscall
-#else /* CONFIG_IA32_EMULATION */
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-#define __sys_ni_syscall sys_ni_syscall
-#endif /* CONFIG_IA32_EMULATION */
+#define __SYSCALL_I386(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_I386(nr, sym) [nr] = __ia32_##sym,
-__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
- [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
+ [0 ... __NR_ia32_syscall_max] = &__ia32_sys_ni_syscall,
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index adf619a856e8..1594ec72bcbb 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -5,24 +5,17 @@
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
-#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
#include <asm/syscall.h>
-extern asmlinkage long sys_ni_syscall(void);
+#define __SYSCALL_X32(nr, sym)
+#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
-SYSCALL_DEFINE0(ni_syscall)
-{
- return sys_ni_syscall();
-}
-
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
-#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_64(nr, sym) extern long __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#undef __SYSCALL_X32
-#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
-#define __SYSCALL_X32(nr, sym, qual)
+#define __SYSCALL_64(nr, sym) [nr] = __x64_##sym,
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
@@ -32,25 +25,3 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
#include <asm/syscalls_64.h>
};
-
-#undef __SYSCALL_64
-#undef __SYSCALL_X32
-
-#ifdef CONFIG_X86_X32_ABI
-
-#define __SYSCALL_64(nr, sym, qual)
-#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
-
-asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
- /*
- * Smells like a compiler bug -- it doesn't work
- * when the & below is removed.
- */
- [0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall,
-#include <asm/syscalls_64.h>
-};
-
-#undef __SYSCALL_64
-#undef __SYSCALL_X32
-
-#endif
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
new file mode 100644
index 000000000000..3d8d70d3896c
--- /dev/null
+++ b/arch/x86/entry/syscall_x32.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* System call table for x32 ABI. */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+#include <linux/syscalls.h>
+#include <asm/unistd.h>
+#include <asm/syscall.h>
+
+#define __SYSCALL_64(nr, sym)
+
+#define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
+#define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#include <asm/syscalls_64.h>
+#undef __SYSCALL_X32
+#undef __SYSCALL_COMMON
+
+#define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
+#define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
+
+asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_x32_syscall_max] = &__x64_sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c17cb77eb150..54581ac671b4 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -11,434 +11,434 @@
#
# The abi is always "i386" for this file.
#
-0 i386 restart_syscall sys_restart_syscall __ia32_sys_restart_syscall
-1 i386 exit sys_exit __ia32_sys_exit
-2 i386 fork sys_fork __ia32_sys_fork
-3 i386 read sys_read __ia32_sys_read
-4 i386 write sys_write __ia32_sys_write
-5 i386 open sys_open __ia32_compat_sys_open
-6 i386 close sys_close __ia32_sys_close
-7 i386 waitpid sys_waitpid __ia32_sys_waitpid
-8 i386 creat sys_creat __ia32_sys_creat
-9 i386 link sys_link __ia32_sys_link
-10 i386 unlink sys_unlink __ia32_sys_unlink
-11 i386 execve sys_execve __ia32_compat_sys_execve
-12 i386 chdir sys_chdir __ia32_sys_chdir
-13 i386 time sys_time32 __ia32_sys_time32
-14 i386 mknod sys_mknod __ia32_sys_mknod
-15 i386 chmod sys_chmod __ia32_sys_chmod
-16 i386 lchown sys_lchown16 __ia32_sys_lchown16
+0 i386 restart_syscall sys_restart_syscall
+1 i386 exit sys_exit
+2 i386 fork sys_fork
+3 i386 read sys_read
+4 i386 write sys_write
+5 i386 open sys_open compat_sys_open
+6 i386 close sys_close
+7 i386 waitpid sys_waitpid
+8 i386 creat sys_creat
+9 i386 link sys_link
+10 i386 unlink sys_unlink
+11 i386 execve sys_execve compat_sys_execve
+12 i386 chdir sys_chdir
+13 i386 time sys_time32
+14 i386 mknod sys_mknod
+15 i386 chmod sys_chmod
+16 i386 lchown sys_lchown16
17 i386 break
-18 i386 oldstat sys_stat __ia32_sys_stat
-19 i386 lseek sys_lseek __ia32_compat_sys_lseek
-20 i386 getpid sys_getpid __ia32_sys_getpid
-21 i386 mount sys_mount __ia32_compat_sys_mount
-22 i386 umount sys_oldumount __ia32_sys_oldumount
-23 i386 setuid sys_setuid16 __ia32_sys_setuid16
-24 i386 getuid sys_getuid16 __ia32_sys_getuid16
-25 i386 stime sys_stime32 __ia32_sys_stime32
-26 i386 ptrace sys_ptrace __ia32_compat_sys_ptrace
-27 i386 alarm sys_alarm __ia32_sys_alarm
-28 i386 oldfstat sys_fstat __ia32_sys_fstat
-29 i386 pause sys_pause __ia32_sys_pause
-30 i386 utime sys_utime32 __ia32_sys_utime32
+18 i386 oldstat sys_stat
+19 i386 lseek sys_lseek compat_sys_lseek
+20 i386 getpid sys_getpid
+21 i386 mount sys_mount compat_sys_mount
+22 i386 umount sys_oldumount
+23 i386 setuid sys_setuid16
+24 i386 getuid sys_getuid16
+25 i386 stime sys_stime32
+26 i386 ptrace sys_ptrace compat_sys_ptrace
+27 i386 alarm sys_alarm
+28 i386 oldfstat sys_fstat
+29 i386 pause sys_pause
+30 i386 utime sys_utime32
31 i386 stty
32 i386 gtty
-33 i386 access sys_access __ia32_sys_access
-34 i386 nice sys_nice __ia32_sys_nice
+33 i386 access sys_access
+34 i386 nice sys_nice
35 i386 ftime
-36 i386 sync sys_sync __ia32_sys_sync
-37 i386 kill sys_kill __ia32_sys_kill
-38 i386 rename sys_rename __ia32_sys_rename
-39 i386 mkdir sys_mkdir __ia32_sys_mkdir
-40 i386 rmdir sys_rmdir __ia32_sys_rmdir
-41 i386 dup sys_dup __ia32_sys_dup
-42 i386 pipe sys_pipe __ia32_sys_pipe
-43 i386 times sys_times __ia32_compat_sys_times
+36 i386 sync sys_sync
+37 i386 kill sys_kill
+38 i386 rename sys_rename
+39 i386 mkdir sys_mkdir
+40 i386 rmdir sys_rmdir
+41 i386 dup sys_dup
+42 i386 pipe sys_pipe
+43 i386 times sys_times compat_sys_times
44 i386 prof
-45 i386 brk sys_brk __ia32_sys_brk
-46 i386 setgid sys_setgid16 __ia32_sys_setgid16
-47 i386 getgid sys_getgid16 __ia32_sys_getgid16
-48 i386 signal sys_signal __ia32_sys_signal
-49 i386 geteuid sys_geteuid16 __ia32_sys_geteuid16
-50 i386 getegid sys_getegid16 __ia32_sys_getegid16
-51 i386 acct sys_acct __ia32_sys_acct
-52 i386 umount2 sys_umount __ia32_sys_umount
+45 i386 brk sys_brk
+46 i386 setgid sys_setgid16
+47 i386 getgid sys_getgid16
+48 i386 signal sys_signal
+49 i386 geteuid sys_geteuid16
+50 i386 getegid sys_getegid16
+51 i386 acct sys_acct
+52 i386 umount2 sys_umount
53 i386 lock
-54 i386 ioctl sys_ioctl __ia32_compat_sys_ioctl
-55 i386 fcntl sys_fcntl __ia32_compat_sys_fcntl64
+54 i386 ioctl sys_ioctl compat_sys_ioctl
+55 i386 fcntl sys_fcntl compat_sys_fcntl64
56 i386 mpx
-57 i386 setpgid sys_setpgid __ia32_sys_setpgid
+57 i386 setpgid sys_setpgid
58 i386 ulimit
-59 i386 oldolduname sys_olduname __ia32_sys_olduname
-60 i386 umask sys_umask __ia32_sys_umask
-61 i386 chroot sys_chroot __ia32_sys_chroot
-62 i386 ustat sys_ustat __ia32_compat_sys_ustat
-63 i386 dup2 sys_dup2 __ia32_sys_dup2
-64 i386 getppid sys_getppid __ia32_sys_getppid
-65 i386 getpgrp sys_getpgrp __ia32_sys_getpgrp
-66 i386 setsid sys_setsid __ia32_sys_setsid
-67 i386 sigaction sys_sigaction __ia32_compat_sys_sigaction
-68 i386 sgetmask sys_sgetmask __ia32_sys_sgetmask
-69 i386 ssetmask sys_ssetmask __ia32_sys_ssetmask
-70 i386 setreuid sys_setreuid16 __ia32_sys_setreuid16
-71 i386 setregid sys_setregid16 __ia32_sys_setregid16
-72 i386 sigsuspend sys_sigsuspend __ia32_sys_sigsuspend
-73 i386 sigpending sys_sigpending __ia32_compat_sys_sigpending
-74 i386 sethostname sys_sethostname __ia32_sys_sethostname
-75 i386 setrlimit sys_setrlimit __ia32_compat_sys_setrlimit
-76 i386 getrlimit sys_old_getrlimit __ia32_compat_sys_old_getrlimit
-77 i386 getrusage sys_getrusage __ia32_compat_sys_getrusage
-78 i386 gettimeofday sys_gettimeofday __ia32_compat_sys_gettimeofday
-79 i386 settimeofday sys_settimeofday __ia32_compat_sys_settimeofday
-80 i386 getgroups sys_getgroups16 __ia32_sys_getgroups16
-81 i386 setgroups sys_setgroups16 __ia32_sys_setgroups16
-82 i386 select sys_old_select __ia32_compat_sys_old_select
-83 i386 symlink sys_symlink __ia32_sys_symlink
-84 i386 oldlstat sys_lstat __ia32_sys_lstat
-85 i386 readlink sys_readlink __ia32_sys_readlink
-86 i386 uselib sys_uselib __ia32_sys_uselib
-87 i386 swapon sys_swapon __ia32_sys_swapon
-88 i386 reboot sys_reboot __ia32_sys_reboot
-89 i386 readdir sys_old_readdir __ia32_compat_sys_old_readdir
-90 i386 mmap sys_old_mmap __ia32_compat_sys_x86_mmap
-91 i386 munmap sys_munmap __ia32_sys_munmap
-92 i386 truncate sys_truncate __ia32_compat_sys_truncate
-93 i386 ftruncate sys_ftruncate __ia32_compat_sys_ftruncate
-94 i386 fchmod sys_fchmod __ia32_sys_fchmod
-95 i386 fchown sys_fchown16 __ia32_sys_fchown16
-96 i386 getpriority sys_getpriority __ia32_sys_getpriority
-97 i386 setpriority sys_setpriority __ia32_sys_setpriority
+59 i386 oldolduname sys_olduname
+60 i386 umask sys_umask
+61 i386 chroot sys_chroot
+62 i386 ustat sys_ustat compat_sys_ustat
+63 i386 dup2 sys_dup2
+64 i386 getppid sys_getppid
+65 i386 getpgrp sys_getpgrp
+66 i386 setsid sys_setsid
+67 i386 sigaction sys_sigaction compat_sys_sigaction
+68 i386 sgetmask sys_sgetmask
+69 i386 ssetmask sys_ssetmask
+70 i386 setreuid sys_setreuid16
+71 i386 setregid sys_setregid16
+72 i386 sigsuspend sys_sigsuspend
+73 i386 sigpending sys_sigpending compat_sys_sigpending
+74 i386 sethostname sys_sethostname
+75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
+76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
+77 i386 getrusage sys_getrusage compat_sys_getrusage
+78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
+80 i386 getgroups sys_getgroups16
+81 i386 setgroups sys_setgroups16
+82 i386 select sys_old_select compat_sys_old_select
+83 i386 symlink sys_symlink
+84 i386 oldlstat sys_lstat
+85 i386 readlink sys_readlink
+86 i386 uselib sys_uselib
+87 i386 swapon sys_swapon
+88 i386 reboot sys_reboot
+89 i386 readdir sys_old_readdir compat_sys_old_readdir
+90 i386 mmap sys_old_mmap compat_sys_ia32_mmap
+91 i386 munmap sys_munmap
+92 i386 truncate sys_truncate compat_sys_truncate
+93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
+94 i386 fchmod sys_fchmod
+95 i386 fchown sys_fchown16
+96 i386 getpriority sys_getpriority
+97 i386 setpriority sys_setpriority
98 i386 profil
-99 i386 statfs sys_statfs __ia32_compat_sys_statfs
-100 i386 fstatfs sys_fstatfs __ia32_compat_sys_fstatfs
-101 i386 ioperm sys_ioperm __ia32_sys_ioperm
-102 i386 socketcall sys_socketcall __ia32_compat_sys_socketcall
-103 i386 syslog sys_syslog __ia32_sys_syslog
-104 i386 setitimer sys_setitimer __ia32_compat_sys_setitimer
-105 i386 getitimer sys_getitimer __ia32_compat_sys_getitimer
-106 i386 stat sys_newstat __ia32_compat_sys_newstat
-107 i386 lstat sys_newlstat __ia32_compat_sys_newlstat
-108 i386 fstat sys_newfstat __ia32_compat_sys_newfstat
-109 i386 olduname sys_uname __ia32_sys_uname
-110 i386 iopl sys_iopl __ia32_sys_iopl
-111 i386 vhangup sys_vhangup __ia32_sys_vhangup
+99 i386 statfs sys_statfs compat_sys_statfs
+100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
+101 i386 ioperm sys_ioperm
+102 i386 socketcall sys_socketcall compat_sys_socketcall
+103 i386 syslog sys_syslog
+104 i386 setitimer sys_setitimer compat_sys_setitimer
+105 i386 getitimer sys_getitimer compat_sys_getitimer
+106 i386 stat sys_newstat compat_sys_newstat
+107 i386 lstat sys_newlstat compat_sys_newlstat
+108 i386 fstat sys_newfstat compat_sys_newfstat
+109 i386 olduname sys_uname
+110 i386 iopl sys_iopl
+111 i386 vhangup sys_vhangup
112 i386 idle
-113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall
-114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4
-115 i386 swapoff sys_swapoff __ia32_sys_swapoff
-116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo
-117 i386 ipc sys_ipc __ia32_compat_sys_ipc
-118 i386 fsync sys_fsync __ia32_sys_fsync
-119 i386 sigreturn sys_sigreturn __ia32_compat_sys_sigreturn
-120 i386 clone sys_clone __ia32_compat_sys_x86_clone
-121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname
-122 i386 uname sys_newuname __ia32_sys_newuname
-123 i386 modify_ldt sys_modify_ldt __ia32_sys_modify_ldt
-124 i386 adjtimex sys_adjtimex_time32 __ia32_sys_adjtimex_time32
-125 i386 mprotect sys_mprotect __ia32_sys_mprotect
-126 i386 sigprocmask sys_sigprocmask __ia32_compat_sys_sigprocmask
+113 i386 vm86old sys_vm86old sys_ni_syscall
+114 i386 wait4 sys_wait4 compat_sys_wait4
+115 i386 swapoff sys_swapoff
+116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
+117 i386 ipc sys_ipc compat_sys_ipc
+118 i386 fsync sys_fsync
+119 i386 sigreturn sys_sigreturn compat_sys_sigreturn
+120 i386 clone sys_clone compat_sys_ia32_clone
+121 i386 setdomainname sys_setdomainname
+122 i386 uname sys_newuname
+123 i386 modify_ldt sys_modify_ldt
+124 i386 adjtimex sys_adjtimex_time32
+125 i386 mprotect sys_mprotect
+126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
127 i386 create_module
-128 i386 init_module sys_init_module __ia32_sys_init_module
-129 i386 delete_module sys_delete_module __ia32_sys_delete_module
+128 i386 init_module sys_init_module
+129 i386 delete_module sys_delete_module
130 i386 get_kernel_syms
-131 i386 quotactl sys_quotactl __ia32_compat_sys_quotactl32
-132 i386 getpgid sys_getpgid __ia32_sys_getpgid
-133 i386 fchdir sys_fchdir __ia32_sys_fchdir
-134 i386 bdflush sys_bdflush __ia32_sys_bdflush
-135 i386 sysfs sys_sysfs __ia32_sys_sysfs
-136 i386 personality sys_personality __ia32_sys_personality
+131 i386 quotactl sys_quotactl compat_sys_quotactl32
+132 i386 getpgid sys_getpgid
+133 i386 fchdir sys_fchdir
+134 i386 bdflush sys_bdflush
+135 i386 sysfs sys_sysfs
+136 i386 personality sys_personality
137 i386 afs_syscall
-138 i386 setfsuid sys_setfsuid16 __ia32_sys_setfsuid16
-139 i386 setfsgid sys_setfsgid16 __ia32_sys_setfsgid16
-140 i386 _llseek sys_llseek __ia32_sys_llseek
-141 i386 getdents sys_getdents __ia32_compat_sys_getdents
-142 i386 _newselect sys_select __ia32_compat_sys_select
-143 i386 flock sys_flock __ia32_sys_flock
-144 i386 msync sys_msync __ia32_sys_msync
-145 i386 readv sys_readv __ia32_compat_sys_readv
-146 i386 writev sys_writev __ia32_compat_sys_writev
-147 i386 getsid sys_getsid __ia32_sys_getsid
-148 i386 fdatasync sys_fdatasync __ia32_sys_fdatasync
-149 i386 _sysctl sys_sysctl __ia32_compat_sys_sysctl
-150 i386 mlock sys_mlock __ia32_sys_mlock
-151 i386 munlock sys_munlock __ia32_sys_munlock
-152 i386 mlockall sys_mlockall __ia32_sys_mlockall
-153 i386 munlockall sys_munlockall __ia32_sys_munlockall
-154 i386 sched_setparam sys_sched_setparam __ia32_sys_sched_setparam
-155 i386 sched_getparam sys_sched_getparam __ia32_sys_sched_getparam
-156 i386 sched_setscheduler sys_sched_setscheduler __ia32_sys_sched_setscheduler
-157 i386 sched_getscheduler sys_sched_getscheduler __ia32_sys_sched_getscheduler
-158 i386 sched_yield sys_sched_yield __ia32_sys_sched_yield
-159 i386 sched_get_priority_max sys_sched_get_priority_max __ia32_sys_sched_get_priority_max
-160 i386 sched_get_priority_min sys_sched_get_priority_min __ia32_sys_sched_get_priority_min
-161 i386 sched_rr_get_interval sys_sched_rr_get_interval_time32 __ia32_sys_sched_rr_get_interval_time32
-162 i386 nanosleep sys_nanosleep_time32 __ia32_sys_nanosleep_time32
-163 i386 mremap sys_mremap __ia32_sys_mremap
-164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16
-165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16
-166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall
+138 i386 setfsuid sys_setfsuid16
+139 i386 setfsgid sys_setfsgid16
+140 i386 _llseek sys_llseek
+141 i386 getdents sys_getdents compat_sys_getdents
+142 i386 _newselect sys_select compat_sys_select
+143 i386 flock sys_flock
+144 i386 msync sys_msync
+145 i386 readv sys_readv compat_sys_readv
+146 i386 writev sys_writev compat_sys_writev
+147 i386 getsid sys_getsid
+148 i386 fdatasync sys_fdatasync
+149 i386 _sysctl sys_sysctl compat_sys_sysctl
+150 i386 mlock sys_mlock
+151 i386 munlock sys_munlock
+152 i386 mlockall sys_mlockall
+153 i386 munlockall sys_munlockall
+154 i386 sched_setparam sys_sched_setparam
+155 i386 sched_getparam sys_sched_getparam
+156 i386 sched_setscheduler sys_sched_setscheduler
+157 i386 sched_getscheduler sys_sched_getscheduler
+158 i386 sched_yield sys_sched_yield
+159 i386 sched_get_priority_max sys_sched_get_priority_max
+160 i386 sched_get_priority_min sys_sched_get_priority_min
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval_time32
+162 i386 nanosleep sys_nanosleep_time32
+163 i386 mremap sys_mremap
+164 i386 setresuid sys_setresuid16
+165 i386 getresuid sys_getresuid16
+166 i386 vm86 sys_vm86 sys_ni_syscall
167 i386 query_module
-168 i386 poll sys_poll __ia32_sys_poll
+168 i386 poll sys_poll
169 i386 nfsservctl
-170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16
-171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16
-172 i386 prctl sys_prctl __ia32_sys_prctl
-173 i386 rt_sigreturn sys_rt_sigreturn __ia32_compat_sys_rt_sigreturn
-174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
-175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask
-176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
-177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32
-178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
-179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend
-180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
-181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite
-182 i386 chown sys_chown16 __ia32_sys_chown16
-183 i386 getcwd sys_getcwd __ia32_sys_getcwd
-184 i386 capget sys_capget __ia32_sys_capget
-185 i386 capset sys_capset __ia32_sys_capset
-186 i386 sigaltstack sys_sigaltstack __ia32_compat_sys_sigaltstack
-187 i386 sendfile sys_sendfile __ia32_compat_sys_sendfile
+170 i386 setresgid sys_setresgid16
+171 i386 getresgid sys_getresgid16
+172 i386 prctl sys_prctl
+173 i386 rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+175 i386 rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32
+178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+179 i386 rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+180 i386 pread64 sys_ia32_pread64
+181 i386 pwrite64 sys_ia32_pwrite64
+182 i386 chown sys_chown16
+183 i386 getcwd sys_getcwd
+184 i386 capget sys_capget
+185 i386 capset sys_capset
+186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
+187 i386 sendfile sys_sendfile compat_sys_sendfile
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork sys_vfork __ia32_sys_vfork
-191 i386 ugetrlimit sys_getrlimit __ia32_compat_sys_getrlimit
-192 i386 mmap2 sys_mmap_pgoff __ia32_sys_mmap_pgoff
-193 i386 truncate64 sys_truncate64 __ia32_compat_sys_x86_truncate64
-194 i386 ftruncate64 sys_ftruncate64 __ia32_compat_sys_x86_ftruncate64
-195 i386 stat64 sys_stat64 __ia32_compat_sys_x86_stat64
-196 i386 lstat64 sys_lstat64 __ia32_compat_sys_x86_lstat64
-197 i386 fstat64 sys_fstat64 __ia32_compat_sys_x86_fstat64
-198 i386 lchown32 sys_lchown __ia32_sys_lchown
-199 i386 getuid32 sys_getuid __ia32_sys_getuid
-200 i386 getgid32 sys_getgid __ia32_sys_getgid
-201 i386 geteuid32 sys_geteuid __ia32_sys_geteuid
-202 i386 getegid32 sys_getegid __ia32_sys_getegid
-203 i386 setreuid32 sys_setreuid __ia32_sys_setreuid
-204 i386 setregid32 sys_setregid __ia32_sys_setregid
-205 i386 getgroups32 sys_getgroups __ia32_sys_getgroups
-206 i386 setgroups32 sys_setgroups __ia32_sys_setgroups
-207 i386 fchown32 sys_fchown __ia32_sys_fchown
-208 i386 setresuid32 sys_setresuid __ia32_sys_setresuid
-209 i386 getresuid32 sys_getresuid __ia32_sys_getresuid
-210 i386 setresgid32 sys_setresgid __ia32_sys_setresgid
-211 i386 getresgid32 sys_getresgid __ia32_sys_getresgid
-212 i386 chown32 sys_chown __ia32_sys_chown
-213 i386 setuid32 sys_setuid __ia32_sys_setuid
-214 i386 setgid32 sys_setgid __ia32_sys_setgid
-215 i386 setfsuid32 sys_setfsuid __ia32_sys_setfsuid
-216 i386 setfsgid32 sys_setfsgid __ia32_sys_setfsgid
-217 i386 pivot_root sys_pivot_root __ia32_sys_pivot_root
-218 i386 mincore sys_mincore __ia32_sys_mincore
-219 i386 madvise sys_madvise __ia32_sys_madvise
-220 i386 getdents64 sys_getdents64 __ia32_sys_getdents64
-221 i386 fcntl64 sys_fcntl64 __ia32_compat_sys_fcntl64
+190 i386 vfork sys_vfork
+191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
+192 i386 mmap2 sys_mmap_pgoff
+193 i386 truncate64 sys_ia32_truncate64
+194 i386 ftruncate64 sys_ia32_ftruncate64
+195 i386 stat64 sys_stat64 compat_sys_ia32_stat64
+196 i386 lstat64 sys_lstat64 compat_sys_ia32_lstat64
+197 i386 fstat64 sys_fstat64 compat_sys_ia32_fstat64
+198 i386 lchown32 sys_lchown
+199 i386 getuid32 sys_getuid
+200 i386 getgid32 sys_getgid
+201 i386 geteuid32 sys_geteuid
+202 i386 getegid32 sys_getegid
+203 i386 setreuid32 sys_setreuid
+204 i386 setregid32 sys_setregid
+205 i386 getgroups32 sys_getgroups
+206 i386 setgroups32 sys_setgroups
+207 i386 fchown32 sys_fchown
+208 i386 setresuid32 sys_setresuid
+209 i386 getresuid32 sys_getresuid
+210 i386 setresgid32 sys_setresgid
+211 i386 getresgid32 sys_getresgid
+212 i386 chown32 sys_chown
+213 i386 setuid32 sys_setuid
+214 i386 setgid32 sys_setgid
+215 i386 setfsuid32 sys_setfsuid
+216 i386 setfsgid32 sys_setfsgid
+217 i386 pivot_root sys_pivot_root
+218 i386 mincore sys_mincore
+219 i386 madvise sys_madvise
+220 i386 getdents64 sys_getdents64
+221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
# 222 is unused
# 223 is unused
-224 i386 gettid sys_gettid __ia32_sys_gettid
-225 i386 readahead sys_readahead __ia32_compat_sys_x86_readahead
-226 i386 setxattr sys_setxattr __ia32_sys_setxattr
-227 i386 lsetxattr sys_lsetxattr __ia32_sys_lsetxattr
-228 i386 fsetxattr sys_fsetxattr __ia32_sys_fsetxattr
-229 i386 getxattr sys_getxattr __ia32_sys_getxattr
-230 i386 lgetxattr sys_lgetxattr __ia32_sys_lgetxattr
-231 i386 fgetxattr sys_fgetxattr __ia32_sys_fgetxattr
-232 i386 listxattr sys_listxattr __ia32_sys_listxattr
-233 i386 llistxattr sys_llistxattr __ia32_sys_llistxattr
-234 i386 flistxattr sys_flistxattr __ia32_sys_flistxattr
-235 i386 removexattr sys_removexattr __ia32_sys_removexattr
-236 i386 lremovexattr sys_lremovexattr __ia32_sys_lremovexattr
-237 i386 fremovexattr sys_fremovexattr __ia32_sys_fremovexattr
-238 i386 tkill sys_tkill __ia32_sys_tkill
-239 i386 sendfile64 sys_sendfile64 __ia32_sys_sendfile64
-240 i386 futex sys_futex_time32 __ia32_sys_futex_time32
-241 i386 sched_setaffinity sys_sched_setaffinity __ia32_compat_sys_sched_setaffinity
-242 i386 sched_getaffinity sys_sched_getaffinity __ia32_compat_sys_sched_getaffinity
-243 i386 set_thread_area sys_set_thread_area __ia32_sys_set_thread_area
-244 i386 get_thread_area sys_get_thread_area __ia32_sys_get_thread_area
-245 i386 io_setup sys_io_setup __ia32_compat_sys_io_setup
-246 i386 io_destroy sys_io_destroy __ia32_sys_io_destroy
-247 i386 io_getevents sys_io_getevents_time32 __ia32_sys_io_getevents_time32
-248 i386 io_submit sys_io_submit __ia32_compat_sys_io_submit
-249 i386 io_cancel sys_io_cancel __ia32_sys_io_cancel
-250 i386 fadvise64 sys_fadvise64 __ia32_compat_sys_x86_fadvise64
+224 i386 gettid sys_gettid
+225 i386 readahead sys_ia32_readahead
+226 i386 setxattr sys_setxattr
+227 i386 lsetxattr sys_lsetxattr
+228 i386 fsetxattr sys_fsetxattr
+229 i386 getxattr sys_getxattr
+230 i386 lgetxattr sys_lgetxattr
+231 i386 fgetxattr sys_fgetxattr
+232 i386 listxattr sys_listxattr
+233 i386 llistxattr sys_llistxattr
+234 i386 flistxattr sys_flistxattr
+235 i386 removexattr sys_removexattr
+236 i386 lremovexattr sys_lremovexattr
+237 i386 fremovexattr sys_fremovexattr
+238 i386 tkill sys_tkill
+239 i386 sendfile64 sys_sendfile64
+240 i386 futex sys_futex_time32
+241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+243 i386 set_thread_area sys_set_thread_area
+244 i386 get_thread_area sys_get_thread_area
+245 i386 io_setup sys_io_setup compat_sys_io_setup
+246 i386 io_destroy sys_io_destroy
+247 i386 io_getevents sys_io_getevents_time32
+248 i386 io_submit sys_io_submit compat_sys_io_submit
+249 i386 io_cancel sys_io_cancel
+250 i386 fadvise64 sys_ia32_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
-252 i386 exit_group sys_exit_group __ia32_sys_exit_group
-253 i386 lookup_dcookie sys_lookup_dcookie __ia32_compat_sys_lookup_dcookie
-254 i386 epoll_create sys_epoll_create __ia32_sys_epoll_create
-255 i386 epoll_ctl sys_epoll_ctl __ia32_sys_epoll_ctl
-256 i386 epoll_wait sys_epoll_wait __ia32_sys_epoll_wait
-257 i386 remap_file_pages sys_remap_file_pages __ia32_sys_remap_file_pages
-258 i386 set_tid_address sys_set_tid_address __ia32_sys_set_tid_address
-259 i386 timer_create sys_timer_create __ia32_compat_sys_timer_create
-260 i386 timer_settime sys_timer_settime32 __ia32_sys_timer_settime32
-261 i386 timer_gettime sys_timer_gettime32 __ia32_sys_timer_gettime32
-262 i386 timer_getoverrun sys_timer_getoverrun __ia32_sys_timer_getoverrun
-263 i386 timer_delete sys_timer_delete __ia32_sys_timer_delete
-264 i386 clock_settime sys_clock_settime32 __ia32_sys_clock_settime32
-265 i386 clock_gettime sys_clock_gettime32 __ia32_sys_clock_gettime32
-266 i386 clock_getres sys_clock_getres_time32 __ia32_sys_clock_getres_time32
-267 i386 clock_nanosleep sys_clock_nanosleep_time32 __ia32_sys_clock_nanosleep_time32
-268 i386 statfs64 sys_statfs64 __ia32_compat_sys_statfs64
-269 i386 fstatfs64 sys_fstatfs64 __ia32_compat_sys_fstatfs64
-270 i386 tgkill sys_tgkill __ia32_sys_tgkill
-271 i386 utimes sys_utimes_time32 __ia32_sys_utimes_time32
-272 i386 fadvise64_64 sys_fadvise64_64 __ia32_compat_sys_x86_fadvise64_64
+252 i386 exit_group sys_exit_group
+253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+254 i386 epoll_create sys_epoll_create
+255 i386 epoll_ctl sys_epoll_ctl
+256 i386 epoll_wait sys_epoll_wait
+257 i386 remap_file_pages sys_remap_file_pages
+258 i386 set_tid_address sys_set_tid_address
+259 i386 timer_create sys_timer_create compat_sys_timer_create
+260 i386 timer_settime sys_timer_settime32
+261 i386 timer_gettime sys_timer_gettime32
+262 i386 timer_getoverrun sys_timer_getoverrun
+263 i386 timer_delete sys_timer_delete
+264 i386 clock_settime sys_clock_settime32
+265 i386 clock_gettime sys_clock_gettime32
+266 i386 clock_getres sys_clock_getres_time32
+267 i386 clock_nanosleep sys_clock_nanosleep_time32
+268 i386 statfs64 sys_statfs64 compat_sys_statfs64
+269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+270 i386 tgkill sys_tgkill
+271 i386 utimes sys_utimes_time32
+272 i386 fadvise64_64 sys_ia32_fadvise64_64
273 i386 vserver
-274 i386 mbind sys_mbind __ia32_sys_mbind
-275 i386 get_mempolicy sys_get_mempolicy __ia32_compat_sys_get_mempolicy
-276 i386 set_mempolicy sys_set_mempolicy __ia32_sys_set_mempolicy
-277 i386 mq_open sys_mq_open __ia32_compat_sys_mq_open
-278 i386 mq_unlink sys_mq_unlink __ia32_sys_mq_unlink
-279 i386 mq_timedsend sys_mq_timedsend_time32 __ia32_sys_mq_timedsend_time32
-280 i386 mq_timedreceive sys_mq_timedreceive_time32 __ia32_sys_mq_timedreceive_time32
-281 i386 mq_notify sys_mq_notify __ia32_compat_sys_mq_notify
-282 i386 mq_getsetattr sys_mq_getsetattr __ia32_compat_sys_mq_getsetattr
-283 i386 kexec_load sys_kexec_load __ia32_compat_sys_kexec_load
-284 i386 waitid sys_waitid __ia32_compat_sys_waitid
+274 i386 mbind sys_mbind
+275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+276 i386 set_mempolicy sys_set_mempolicy
+277 i386 mq_open sys_mq_open compat_sys_mq_open
+278 i386 mq_unlink sys_mq_unlink
+279 i386 mq_timedsend sys_mq_timedsend_time32
+280 i386 mq_timedreceive sys_mq_timedreceive_time32
+281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
+282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
+284 i386 waitid sys_waitid compat_sys_waitid
# 285 sys_setaltroot
-286 i386 add_key sys_add_key __ia32_sys_add_key
-287 i386 request_key sys_request_key __ia32_sys_request_key
-288 i386 keyctl sys_keyctl __ia32_compat_sys_keyctl
-289 i386 ioprio_set sys_ioprio_set __ia32_sys_ioprio_set
-290 i386 ioprio_get sys_ioprio_get __ia32_sys_ioprio_get
-291 i386 inotify_init sys_inotify_init __ia32_sys_inotify_init
-292 i386 inotify_add_watch sys_inotify_add_watch __ia32_sys_inotify_add_watch
-293 i386 inotify_rm_watch sys_inotify_rm_watch __ia32_sys_inotify_rm_watch
-294 i386 migrate_pages sys_migrate_pages __ia32_sys_migrate_pages
-295 i386 openat sys_openat __ia32_compat_sys_openat
-296 i386 mkdirat sys_mkdirat __ia32_sys_mkdirat
-297 i386 mknodat sys_mknodat __ia32_sys_mknodat
-298 i386 fchownat sys_fchownat __ia32_sys_fchownat
-299 i386 futimesat sys_futimesat_time32 __ia32_sys_futimesat_time32
-300 i386 fstatat64 sys_fstatat64 __ia32_compat_sys_x86_fstatat
-301 i386 unlinkat sys_unlinkat __ia32_sys_unlinkat
-302 i386 renameat sys_renameat __ia32_sys_renameat
-303 i386 linkat sys_linkat __ia32_sys_linkat
-304 i386 symlinkat sys_symlinkat __ia32_sys_symlinkat
-305 i386 readlinkat sys_readlinkat __ia32_sys_readlinkat
-306 i386 fchmodat sys_fchmodat __ia32_sys_fchmodat
-307 i386 faccessat sys_faccessat __ia32_sys_faccessat
-308 i386 pselect6 sys_pselect6_time32 __ia32_compat_sys_pselect6_time32
-309 i386 ppoll sys_ppoll_time32 __ia32_compat_sys_ppoll_time32
-310 i386 unshare sys_unshare __ia32_sys_unshare
-311 i386 set_robust_list sys_set_robust_list __ia32_compat_sys_set_robust_list
-312 i386 get_robust_list sys_get_robust_list __ia32_compat_sys_get_robust_list
-313 i386 splice sys_splice __ia32_sys_splice
-314 i386 sync_file_range sys_sync_file_range __ia32_compat_sys_x86_sync_file_range
-315 i386 tee sys_tee __ia32_sys_tee
-316 i386 vmsplice sys_vmsplice __ia32_compat_sys_vmsplice
-317 i386 move_pages sys_move_pages __ia32_compat_sys_move_pages
-318 i386 getcpu sys_getcpu __ia32_sys_getcpu
-319 i386 epoll_pwait sys_epoll_pwait __ia32_sys_epoll_pwait
-320 i386 utimensat sys_utimensat_time32 __ia32_sys_utimensat_time32
-321 i386 signalfd sys_signalfd __ia32_compat_sys_signalfd
-322 i386 timerfd_create sys_timerfd_create __ia32_sys_timerfd_create
-323 i386 eventfd sys_eventfd __ia32_sys_eventfd
-324 i386 fallocate sys_fallocate __ia32_compat_sys_x86_fallocate
-325 i386 timerfd_settime sys_timerfd_settime32 __ia32_sys_timerfd_settime32
-326 i386 timerfd_gettime sys_timerfd_gettime32 __ia32_sys_timerfd_gettime32
-327 i386 signalfd4 sys_signalfd4 __ia32_compat_sys_signalfd4
-328 i386 eventfd2 sys_eventfd2 __ia32_sys_eventfd2
-329 i386 epoll_create1 sys_epoll_create1 __ia32_sys_epoll_create1
-330 i386 dup3 sys_dup3 __ia32_sys_dup3
-331 i386 pipe2 sys_pipe2 __ia32_sys_pipe2
-332 i386 inotify_init1 sys_inotify_init1 __ia32_sys_inotify_init1
-333 i386 preadv sys_preadv __ia32_compat_sys_preadv
-334 i386 pwritev sys_pwritev __ia32_compat_sys_pwritev
-335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo __ia32_compat_sys_rt_tgsigqueueinfo
-336 i386 perf_event_open sys_perf_event_open __ia32_sys_perf_event_open
-337 i386 recvmmsg sys_recvmmsg_time32 __ia32_compat_sys_recvmmsg_time32
-338 i386 fanotify_init sys_fanotify_init __ia32_sys_fanotify_init
-339 i386 fanotify_mark sys_fanotify_mark __ia32_compat_sys_fanotify_mark
-340 i386 prlimit64 sys_prlimit64 __ia32_sys_prlimit64
-341 i386 name_to_handle_at sys_name_to_handle_at __ia32_sys_name_to_handle_at
-342 i386 open_by_handle_at sys_open_by_handle_at __ia32_compat_sys_open_by_handle_at
-343 i386 clock_adjtime sys_clock_adjtime32 __ia32_sys_clock_adjtime32
-344 i386 syncfs sys_syncfs __ia32_sys_syncfs
-345 i386 sendmmsg sys_sendmmsg __ia32_compat_sys_sendmmsg
-346 i386 setns sys_setns __ia32_sys_setns
-347 i386 process_vm_readv sys_process_vm_readv __ia32_compat_sys_process_vm_readv
-348 i386 process_vm_writev sys_process_vm_writev __ia32_compat_sys_process_vm_writev
-349 i386 kcmp sys_kcmp __ia32_sys_kcmp
-350 i386 finit_module sys_finit_module __ia32_sys_finit_module
-351 i386 sched_setattr sys_sched_setattr __ia32_sys_sched_setattr
-352 i386 sched_getattr sys_sched_getattr __ia32_sys_sched_getattr
-353 i386 renameat2 sys_renameat2 __ia32_sys_renameat2
-354 i386 seccomp sys_seccomp __ia32_sys_seccomp
-355 i386 getrandom sys_getrandom __ia32_sys_getrandom
-356 i386 memfd_create sys_memfd_create __ia32_sys_memfd_create
-357 i386 bpf sys_bpf __ia32_sys_bpf
-358 i386 execveat sys_execveat __ia32_compat_sys_execveat
-359 i386 socket sys_socket __ia32_sys_socket
-360 i386 socketpair sys_socketpair __ia32_sys_socketpair
-361 i386 bind sys_bind __ia32_sys_bind
-362 i386 connect sys_connect __ia32_sys_connect
-363 i386 listen sys_listen __ia32_sys_listen
-364 i386 accept4 sys_accept4 __ia32_sys_accept4
-365 i386 getsockopt sys_getsockopt __ia32_compat_sys_getsockopt
-366 i386 setsockopt sys_setsockopt __ia32_compat_sys_setsockopt
-367 i386 getsockname sys_getsockname __ia32_sys_getsockname
-368 i386 getpeername sys_getpeername __ia32_sys_getpeername
-369 i386 sendto sys_sendto __ia32_sys_sendto
-370 i386 sendmsg sys_sendmsg __ia32_compat_sys_sendmsg
-371 i386 recvfrom sys_recvfrom __ia32_compat_sys_recvfrom
-372 i386 recvmsg sys_recvmsg __ia32_compat_sys_recvmsg
-373 i386 shutdown sys_shutdown __ia32_sys_shutdown
-374 i386 userfaultfd sys_userfaultfd __ia32_sys_userfaultfd
-375 i386 membarrier sys_membarrier __ia32_sys_membarrier
-376 i386 mlock2 sys_mlock2 __ia32_sys_mlock2
-377 i386 copy_file_range sys_copy_file_range __ia32_sys_copy_file_range
-378 i386 preadv2 sys_preadv2 __ia32_compat_sys_preadv2
-379 i386 pwritev2 sys_pwritev2 __ia32_compat_sys_pwritev2
-380 i386 pkey_mprotect sys_pkey_mprotect __ia32_sys_pkey_mprotect
-381 i386 pkey_alloc sys_pkey_alloc __ia32_sys_pkey_alloc
-382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
-383 i386 statx sys_statx __ia32_sys_statx
-384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
-385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
-386 i386 rseq sys_rseq __ia32_sys_rseq
-393 i386 semget sys_semget __ia32_sys_semget
-394 i386 semctl sys_semctl __ia32_compat_sys_semctl
-395 i386 shmget sys_shmget __ia32_sys_shmget
-396 i386 shmctl sys_shmctl __ia32_compat_sys_shmctl
-397 i386 shmat sys_shmat __ia32_compat_sys_shmat
-398 i386 shmdt sys_shmdt __ia32_sys_shmdt
-399 i386 msgget sys_msgget __ia32_sys_msgget
-400 i386 msgsnd sys_msgsnd __ia32_compat_sys_msgsnd
-401 i386 msgrcv sys_msgrcv __ia32_compat_sys_msgrcv
-402 i386 msgctl sys_msgctl __ia32_compat_sys_msgctl
-403 i386 clock_gettime64 sys_clock_gettime __ia32_sys_clock_gettime
-404 i386 clock_settime64 sys_clock_settime __ia32_sys_clock_settime
-405 i386 clock_adjtime64 sys_clock_adjtime __ia32_sys_clock_adjtime
-406 i386 clock_getres_time64 sys_clock_getres __ia32_sys_clock_getres
-407 i386 clock_nanosleep_time64 sys_clock_nanosleep __ia32_sys_clock_nanosleep
-408 i386 timer_gettime64 sys_timer_gettime __ia32_sys_timer_gettime
-409 i386 timer_settime64 sys_timer_settime __ia32_sys_timer_settime
-410 i386 timerfd_gettime64 sys_timerfd_gettime __ia32_sys_timerfd_gettime
-411 i386 timerfd_settime64 sys_timerfd_settime __ia32_sys_timerfd_settime
-412 i386 utimensat_time64 sys_utimensat __ia32_sys_utimensat
-413 i386 pselect6_time64 sys_pselect6 __ia32_compat_sys_pselect6_time64
-414 i386 ppoll_time64 sys_ppoll __ia32_compat_sys_ppoll_time64
-416 i386 io_pgetevents_time64 sys_io_pgetevents __ia32_sys_io_pgetevents
-417 i386 recvmmsg_time64 sys_recvmmsg __ia32_compat_sys_recvmmsg_time64
-418 i386 mq_timedsend_time64 sys_mq_timedsend __ia32_sys_mq_timedsend
-419 i386 mq_timedreceive_time64 sys_mq_timedreceive __ia32_sys_mq_timedreceive
-420 i386 semtimedop_time64 sys_semtimedop __ia32_sys_semtimedop
-421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time64
-422 i386 futex_time64 sys_futex __ia32_sys_futex
-423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval
-424 i386 pidfd_send_signal sys_pidfd_send_signal __ia32_sys_pidfd_send_signal
-425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
-426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
-427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register
-428 i386 open_tree sys_open_tree __ia32_sys_open_tree
-429 i386 move_mount sys_move_mount __ia32_sys_move_mount
-430 i386 fsopen sys_fsopen __ia32_sys_fsopen
-431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
-432 i386 fsmount sys_fsmount __ia32_sys_fsmount
-433 i386 fspick sys_fspick __ia32_sys_fspick
-434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
-435 i386 clone3 sys_clone3 __ia32_sys_clone3
-437 i386 openat2 sys_openat2 __ia32_sys_openat2
-438 i386 pidfd_getfd sys_pidfd_getfd __ia32_sys_pidfd_getfd
+286 i386 add_key sys_add_key
+287 i386 request_key sys_request_key
+288 i386 keyctl sys_keyctl compat_sys_keyctl
+289 i386 ioprio_set sys_ioprio_set
+290 i386 ioprio_get sys_ioprio_get
+291 i386 inotify_init sys_inotify_init
+292 i386 inotify_add_watch sys_inotify_add_watch
+293 i386 inotify_rm_watch sys_inotify_rm_watch
+294 i386 migrate_pages sys_migrate_pages
+295 i386 openat sys_openat compat_sys_openat
+296 i386 mkdirat sys_mkdirat
+297 i386 mknodat sys_mknodat
+298 i386 fchownat sys_fchownat
+299 i386 futimesat sys_futimesat_time32
+300 i386 fstatat64 sys_fstatat64 compat_sys_ia32_fstatat64
+301 i386 unlinkat sys_unlinkat
+302 i386 renameat sys_renameat
+303 i386 linkat sys_linkat
+304 i386 symlinkat sys_symlinkat
+305 i386 readlinkat sys_readlinkat
+306 i386 fchmodat sys_fchmodat
+307 i386 faccessat sys_faccessat
+308 i386 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32
+309 i386 ppoll sys_ppoll_time32 compat_sys_ppoll_time32
+310 i386 unshare sys_unshare
+311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
+312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
+313 i386 splice sys_splice
+314 i386 sync_file_range sys_ia32_sync_file_range
+315 i386 tee sys_tee
+316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
+317 i386 move_pages sys_move_pages compat_sys_move_pages
+318 i386 getcpu sys_getcpu
+319 i386 epoll_pwait sys_epoll_pwait
+320 i386 utimensat sys_utimensat_time32
+321 i386 signalfd sys_signalfd compat_sys_signalfd
+322 i386 timerfd_create sys_timerfd_create
+323 i386 eventfd sys_eventfd
+324 i386 fallocate sys_ia32_fallocate
+325 i386 timerfd_settime sys_timerfd_settime32
+326 i386 timerfd_gettime sys_timerfd_gettime32
+327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
+328 i386 eventfd2 sys_eventfd2
+329 i386 epoll_create1 sys_epoll_create1
+330 i386 dup3 sys_dup3
+331 i386 pipe2 sys_pipe2
+332 i386 inotify_init1 sys_inotify_init1
+333 i386 preadv sys_preadv compat_sys_preadv
+334 i386 pwritev sys_pwritev compat_sys_pwritev
+335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+336 i386 perf_event_open sys_perf_event_open
+337 i386 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
+338 i386 fanotify_init sys_fanotify_init
+339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+340 i386 prlimit64 sys_prlimit64
+341 i386 name_to_handle_at sys_name_to_handle_at
+342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+343 i386 clock_adjtime sys_clock_adjtime32
+344 i386 syncfs sys_syncfs
+345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
+346 i386 setns sys_setns
+347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
+348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+349 i386 kcmp sys_kcmp
+350 i386 finit_module sys_finit_module
+351 i386 sched_setattr sys_sched_setattr
+352 i386 sched_getattr sys_sched_getattr
+353 i386 renameat2 sys_renameat2
+354 i386 seccomp sys_seccomp
+355 i386 getrandom sys_getrandom
+356 i386 memfd_create sys_memfd_create
+357 i386 bpf sys_bpf
+358 i386 execveat sys_execveat compat_sys_execveat
+359 i386 socket sys_socket
+360 i386 socketpair sys_socketpair
+361 i386 bind sys_bind
+362 i386 connect sys_connect
+363 i386 listen sys_listen
+364 i386 accept4 sys_accept4
+365 i386 getsockopt sys_getsockopt compat_sys_getsockopt
+366 i386 setsockopt sys_setsockopt compat_sys_setsockopt
+367 i386 getsockname sys_getsockname
+368 i386 getpeername sys_getpeername
+369 i386 sendto sys_sendto
+370 i386 sendmsg sys_sendmsg compat_sys_sendmsg
+371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
+372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
+373 i386 shutdown sys_shutdown
+374 i386 userfaultfd sys_userfaultfd
+375 i386 membarrier sys_membarrier
+376 i386 mlock2 sys_mlock2
+377 i386 copy_file_range sys_copy_file_range
+378 i386 preadv2 sys_preadv2 compat_sys_preadv2
+379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
+380 i386 pkey_mprotect sys_pkey_mprotect
+381 i386 pkey_alloc sys_pkey_alloc
+382 i386 pkey_free sys_pkey_free
+383 i386 statx sys_statx
+384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
+386 i386 rseq sys_rseq
+393 i386 semget sys_semget
+394 i386 semctl sys_semctl compat_sys_semctl
+395 i386 shmget sys_shmget
+396 i386 shmctl sys_shmctl compat_sys_shmctl
+397 i386 shmat sys_shmat compat_sys_shmat
+398 i386 shmdt sys_shmdt
+399 i386 msgget sys_msgget
+400 i386 msgsnd sys_msgsnd compat_sys_msgsnd
+401 i386 msgrcv sys_msgrcv compat_sys_msgrcv
+402 i386 msgctl sys_msgctl compat_sys_msgctl
+403 i386 clock_gettime64 sys_clock_gettime
+404 i386 clock_settime64 sys_clock_settime
+405 i386 clock_adjtime64 sys_clock_adjtime
+406 i386 clock_getres_time64 sys_clock_getres
+407 i386 clock_nanosleep_time64 sys_clock_nanosleep
+408 i386 timer_gettime64 sys_timer_gettime
+409 i386 timer_settime64 sys_timer_settime
+410 i386 timerfd_gettime64 sys_timerfd_gettime
+411 i386 timerfd_settime64 sys_timerfd_settime
+412 i386 utimensat_time64 sys_utimensat
+413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+416 i386 io_pgetevents_time64 sys_io_pgetevents
+417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+418 i386 mq_timedsend_time64 sys_mq_timedsend
+419 i386 mq_timedreceive_time64 sys_mq_timedreceive
+420 i386 semtimedop_time64 sys_semtimedop
+421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+422 i386 futex_time64 sys_futex
+423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 i386 pidfd_send_signal sys_pidfd_send_signal
+425 i386 io_uring_setup sys_io_uring_setup
+426 i386 io_uring_enter sys_io_uring_enter
+427 i386 io_uring_register sys_io_uring_register
+428 i386 open_tree sys_open_tree
+429 i386 move_mount sys_move_mount
+430 i386 fsopen sys_fsopen
+431 i386 fsconfig sys_fsconfig
+432 i386 fsmount sys_fsmount
+433 i386 fspick sys_fspick
+434 i386 pidfd_open sys_pidfd_open
+435 i386 clone3 sys_clone3
+437 i386 openat2 sys_openat2
+438 i386 pidfd_getfd sys_pidfd_getfd
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 44d510bc9b78..37b844f839bc 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -8,357 +8,357 @@
#
# The abi is "common", "64" or "x32" for this file.
#
-0 common read __x64_sys_read
-1 common write __x64_sys_write
-2 common open __x64_sys_open
-3 common close __x64_sys_close
-4 common stat __x64_sys_newstat
-5 common fstat __x64_sys_newfstat
-6 common lstat __x64_sys_newlstat
-7 common poll __x64_sys_poll
-8 common lseek __x64_sys_lseek
-9 common mmap __x64_sys_mmap
-10 common mprotect __x64_sys_mprotect
-11 common munmap __x64_sys_munmap
-12 common brk __x64_sys_brk
-13 64 rt_sigaction __x64_sys_rt_sigaction
-14 common rt_sigprocmask __x64_sys_rt_sigprocmask
-15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs
-16 64 ioctl __x64_sys_ioctl
-17 common pread64 __x64_sys_pread64
-18 common pwrite64 __x64_sys_pwrite64
-19 64 readv __x64_sys_readv
-20 64 writev __x64_sys_writev
-21 common access __x64_sys_access
-22 common pipe __x64_sys_pipe
-23 common select __x64_sys_select
-24 common sched_yield __x64_sys_sched_yield
-25 common mremap __x64_sys_mremap
-26 common msync __x64_sys_msync
-27 common mincore __x64_sys_mincore
-28 common madvise __x64_sys_madvise
-29 common shmget __x64_sys_shmget
-30 common shmat __x64_sys_shmat
-31 common shmctl __x64_sys_shmctl
-32 common dup __x64_sys_dup
-33 common dup2 __x64_sys_dup2
-34 common pause __x64_sys_pause
-35 common nanosleep __x64_sys_nanosleep
-36 common getitimer __x64_sys_getitimer
-37 common alarm __x64_sys_alarm
-38 common setitimer __x64_sys_setitimer
-39 common getpid __x64_sys_getpid
-40 common sendfile __x64_sys_sendfile64
-41 common socket __x64_sys_socket
-42 common connect __x64_sys_connect
-43 common accept __x64_sys_accept
-44 common sendto __x64_sys_sendto
-45 64 recvfrom __x64_sys_recvfrom
-46 64 sendmsg __x64_sys_sendmsg
-47 64 recvmsg __x64_sys_recvmsg
-48 common shutdown __x64_sys_shutdown
-49 common bind __x64_sys_bind
-50 common listen __x64_sys_listen
-51 common getsockname __x64_sys_getsockname
-52 common getpeername __x64_sys_getpeername
-53 common socketpair __x64_sys_socketpair
-54 64 setsockopt __x64_sys_setsockopt
-55 64 getsockopt __x64_sys_getsockopt
-56 common clone __x64_sys_clone/ptregs
-57 common fork __x64_sys_fork/ptregs
-58 common vfork __x64_sys_vfork/ptregs
-59 64 execve __x64_sys_execve/ptregs
-60 common exit __x64_sys_exit
-61 common wait4 __x64_sys_wait4
-62 common kill __x64_sys_kill
-63 common uname __x64_sys_newuname
-64 common semget __x64_sys_semget
-65 common semop __x64_sys_semop
-66 common semctl __x64_sys_semctl
-67 common shmdt __x64_sys_shmdt
-68 common msgget __x64_sys_msgget
-69 common msgsnd __x64_sys_msgsnd
-70 common msgrcv __x64_sys_msgrcv
-71 common msgctl __x64_sys_msgctl
-72 common fcntl __x64_sys_fcntl
-73 common flock __x64_sys_flock
-74 common fsync __x64_sys_fsync
-75 common fdatasync __x64_sys_fdatasync
-76 common truncate __x64_sys_truncate
-77 common ftruncate __x64_sys_ftruncate
-78 common getdents __x64_sys_getdents
-79 common getcwd __x64_sys_getcwd
-80 common chdir __x64_sys_chdir
-81 common fchdir __x64_sys_fchdir
-82 common rename __x64_sys_rename
-83 common mkdir __x64_sys_mkdir
-84 common rmdir __x64_sys_rmdir
-85 common creat __x64_sys_creat
-86 common link __x64_sys_link
-87 common unlink __x64_sys_unlink
-88 common symlink __x64_sys_symlink
-89 common readlink __x64_sys_readlink
-90 common chmod __x64_sys_chmod
-91 common fchmod __x64_sys_fchmod
-92 common chown __x64_sys_chown
-93 common fchown __x64_sys_fchown
-94 common lchown __x64_sys_lchown
-95 common umask __x64_sys_umask
-96 common gettimeofday __x64_sys_gettimeofday
-97 common getrlimit __x64_sys_getrlimit
-98 common getrusage __x64_sys_getrusage
-99 common sysinfo __x64_sys_sysinfo
-100 common times __x64_sys_times
-101 64 ptrace __x64_sys_ptrace
-102 common getuid __x64_sys_getuid
-103 common syslog __x64_sys_syslog
-104 common getgid __x64_sys_getgid
-105 common setuid __x64_sys_setuid
-106 common setgid __x64_sys_setgid
-107 common geteuid __x64_sys_geteuid
-108 common getegid __x64_sys_getegid
-109 common setpgid __x64_sys_setpgid
-110 common getppid __x64_sys_getppid
-111 common getpgrp __x64_sys_getpgrp
-112 common setsid __x64_sys_setsid
-113 common setreuid __x64_sys_setreuid
-114 common setregid __x64_sys_setregid
-115 common getgroups __x64_sys_getgroups
-116 common setgroups __x64_sys_setgroups
-117 common setresuid __x64_sys_setresuid
-118 common getresuid __x64_sys_getresuid
-119 common setresgid __x64_sys_setresgid
-120 common getresgid __x64_sys_getresgid
-121 common getpgid __x64_sys_getpgid
-122 common setfsuid __x64_sys_setfsuid
-123 common setfsgid __x64_sys_setfsgid
-124 common getsid __x64_sys_getsid
-125 common capget __x64_sys_capget
-126 common capset __x64_sys_capset
-127 64 rt_sigpending __x64_sys_rt_sigpending
-128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait
-129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo
-130 common rt_sigsuspend __x64_sys_rt_sigsuspend
-131 64 sigaltstack __x64_sys_sigaltstack
-132 common utime __x64_sys_utime
-133 common mknod __x64_sys_mknod
+0 common read sys_read
+1 common write sys_write
+2 common open sys_open
+3 common close sys_close
+4 common stat sys_newstat
+5 common fstat sys_newfstat
+6 common lstat sys_newlstat
+7 common poll sys_poll
+8 common lseek sys_lseek
+9 common mmap sys_mmap
+10 common mprotect sys_mprotect
+11 common munmap sys_munmap
+12 common brk sys_brk
+13 64 rt_sigaction sys_rt_sigaction
+14 common rt_sigprocmask sys_rt_sigprocmask
+15 64 rt_sigreturn sys_rt_sigreturn
+16 64 ioctl sys_ioctl
+17 common pread64 sys_pread64
+18 common pwrite64 sys_pwrite64
+19 64 readv sys_readv
+20 64 writev sys_writev
+21 common access sys_access
+22 common pipe sys_pipe
+23 common select sys_select
+24 common sched_yield sys_sched_yield
+25 common mremap sys_mremap
+26 common msync sys_msync
+27 common mincore sys_mincore
+28 common madvise sys_madvise
+29 common shmget sys_shmget
+30 common shmat sys_shmat
+31 common shmctl sys_shmctl
+32 common dup sys_dup
+33 common dup2 sys_dup2
+34 common pause sys_pause
+35 common nanosleep sys_nanosleep
+36 common getitimer sys_getitimer
+37 common alarm sys_alarm
+38 common setitimer sys_setitimer
+39 common getpid sys_getpid
+40 common sendfile sys_sendfile64
+41 common socket sys_socket
+42 common connect sys_connect
+43 common accept sys_accept
+44 common sendto sys_sendto
+45 64 recvfrom sys_recvfrom
+46 64 sendmsg sys_sendmsg
+47 64 recvmsg sys_recvmsg
+48 common shutdown sys_shutdown
+49 common bind sys_bind
+50 common listen sys_listen
+51 common getsockname sys_getsockname
+52 common getpeername sys_getpeername
+53 common socketpair sys_socketpair
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
+56 common clone sys_clone
+57 common fork sys_fork
+58 common vfork sys_vfork
+59 64 execve sys_execve
+60 common exit sys_exit
+61 common wait4 sys_wait4
+62 common kill sys_kill
+63 common uname sys_newuname
+64 common semget sys_semget
+65 common semop sys_semop
+66 common semctl sys_semctl
+67 common shmdt sys_shmdt
+68 common msgget sys_msgget
+69 common msgsnd sys_msgsnd
+70 common msgrcv sys_msgrcv
+71 common msgctl sys_msgctl
+72 common fcntl sys_fcntl
+73 common flock sys_flock
+74 common fsync sys_fsync
+75 common fdatasync sys_fdatasync
+76 common truncate sys_truncate
+77 common ftruncate sys_ftruncate
+78 common getdents sys_getdents
+79 common getcwd sys_getcwd
+80 common chdir sys_chdir
+81 common fchdir sys_fchdir
+82 common rename sys_rename
+83 common mkdir sys_mkdir
+84 common rmdir sys_rmdir
+85 common creat sys_creat
+86 common link sys_link
+87 common unlink sys_unlink
+88 common symlink sys_symlink
+89 common readlink sys_readlink
+90 common chmod sys_chmod
+91 common fchmod sys_fchmod
+92 common chown sys_chown
+93 common fchown sys_fchown
+94 common lchown sys_lchown
+95 common umask sys_umask
+96 common gettimeofday sys_gettimeofday
+97 common getrlimit sys_getrlimit
+98 common getrusage sys_getrusage
+99 common sysinfo sys_sysinfo
+100 common times sys_times
+101 64 ptrace sys_ptrace
+102 common getuid sys_getuid
+103 common syslog sys_syslog
+104 common getgid sys_getgid
+105 common setuid sys_setuid
+106 common setgid sys_setgid
+107 common geteuid sys_geteuid
+108 common getegid sys_getegid
+109 common setpgid sys_setpgid
+110 common getppid sys_getppid
+111 common getpgrp sys_getpgrp
+112 common setsid sys_setsid
+113 common setreuid sys_setreuid
+114 common setregid sys_setregid
+115 common getgroups sys_getgroups
+116 common setgroups sys_setgroups
+117 common setresuid sys_setresuid
+118 common getresuid sys_getresuid
+119 common setresgid sys_setresgid
+120 common getresgid sys_getresgid
+121 common getpgid sys_getpgid
+122 common setfsuid sys_setfsuid
+123 common setfsgid sys_setfsgid
+124 common getsid sys_getsid
+125 common capget sys_capget
+126 common capset sys_capset
+127 64 rt_sigpending sys_rt_sigpending
+128 64 rt_sigtimedwait sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
+130 common rt_sigsuspend sys_rt_sigsuspend
+131 64 sigaltstack sys_sigaltstack
+132 common utime sys_utime
+133 common mknod sys_mknod
134 64 uselib
-135 common personality __x64_sys_personality
-136 common ustat __x64_sys_ustat
-137 common statfs __x64_sys_statfs
-138 common fstatfs __x64_sys_fstatfs
-139 common sysfs __x64_sys_sysfs
-140 common getpriority __x64_sys_getpriority
-141 common setpriority __x64_sys_setpriority
-142 common sched_setparam __x64_sys_sched_setparam
-143 common sched_getparam __x64_sys_sched_getparam
-144 common sched_setscheduler __x64_sys_sched_setscheduler
-145 common sched_getscheduler __x64_sys_sched_getscheduler
-146 common sched_get_priority_max __x64_sys_sched_get_priority_max
-147 common sched_get_priority_min __x64_sys_sched_get_priority_min
-148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval
-149 common mlock __x64_sys_mlock
-150 common munlock __x64_sys_munlock
-151 common mlockall __x64_sys_mlockall
-152 common munlockall __x64_sys_munlockall
-153 common vhangup __x64_sys_vhangup
-154 common modify_ldt __x64_sys_modify_ldt
-155 common pivot_root __x64_sys_pivot_root
-156 64 _sysctl __x64_sys_sysctl
-157 common prctl __x64_sys_prctl
-158 common arch_prctl __x64_sys_arch_prctl
-159 common adjtimex __x64_sys_adjtimex
-160 common setrlimit __x64_sys_setrlimit
-161 common chroot __x64_sys_chroot
-162 common sync __x64_sys_sync
-163 common acct __x64_sys_acct
-164 common settimeofday __x64_sys_settimeofday
-165 common mount __x64_sys_mount
-166 common umount2 __x64_sys_umount
-167 common swapon __x64_sys_swapon
-168 common swapoff __x64_sys_swapoff
-169 common reboot __x64_sys_reboot
-170 common sethostname __x64_sys_sethostname
-171 common setdomainname __x64_sys_setdomainname
-172 common iopl __x64_sys_iopl/ptregs
-173 common ioperm __x64_sys_ioperm
+135 common personality sys_personality
+136 common ustat sys_ustat
+137 common statfs sys_statfs
+138 common fstatfs sys_fstatfs
+139 common sysfs sys_sysfs
+140 common getpriority sys_getpriority
+141 common setpriority sys_setpriority
+142 common sched_setparam sys_sched_setparam
+143 common sched_getparam sys_sched_getparam
+144 common sched_setscheduler sys_sched_setscheduler
+145 common sched_getscheduler sys_sched_getscheduler
+146 common sched_get_priority_max sys_sched_get_priority_max
+147 common sched_get_priority_min sys_sched_get_priority_min
+148 common sched_rr_get_interval sys_sched_rr_get_interval
+149 common mlock sys_mlock
+150 common munlock sys_munlock
+151 common mlockall sys_mlockall
+152 common munlockall sys_munlockall
+153 common vhangup sys_vhangup
+154 common modify_ldt sys_modify_ldt
+155 common pivot_root sys_pivot_root
+156 64 _sysctl sys_sysctl
+157 common prctl sys_prctl
+158 common arch_prctl sys_arch_prctl
+159 common adjtimex sys_adjtimex
+160 common setrlimit sys_setrlimit
+161 common chroot sys_chroot
+162 common sync sys_sync
+163 common acct sys_acct
+164 common settimeofday sys_settimeofday
+165 common mount sys_mount
+166 common umount2 sys_umount
+167 common swapon sys_swapon
+168 common swapoff sys_swapoff
+169 common reboot sys_reboot
+170 common sethostname sys_sethostname
+171 common setdomainname sys_setdomainname
+172 common iopl sys_iopl
+173 common ioperm sys_ioperm
174 64 create_module
-175 common init_module __x64_sys_init_module
-176 common delete_module __x64_sys_delete_module
+175 common init_module sys_init_module
+176 common delete_module sys_delete_module
177 64 get_kernel_syms
178 64 query_module
-179 common quotactl __x64_sys_quotactl
+179 common quotactl sys_quotactl
180 64 nfsservctl
181 common getpmsg
182 common putpmsg
183 common afs_syscall
184 common tuxcall
185 common security
-186 common gettid __x64_sys_gettid
-187 common readahead __x64_sys_readahead
-188 common setxattr __x64_sys_setxattr
-189 common lsetxattr __x64_sys_lsetxattr
-190 common fsetxattr __x64_sys_fsetxattr
-191 common getxattr __x64_sys_getxattr
-192 common lgetxattr __x64_sys_lgetxattr
-193 common fgetxattr __x64_sys_fgetxattr
-194 common listxattr __x64_sys_listxattr
-195 common llistxattr __x64_sys_llistxattr
-196 common flistxattr __x64_sys_flistxattr
-197 common removexattr __x64_sys_removexattr
-198 common lremovexattr __x64_sys_lremovexattr
-199 common fremovexattr __x64_sys_fremovexattr
-200 common tkill __x64_sys_tkill
-201 common time __x64_sys_time
-202 common futex __x64_sys_futex
-203 common sched_setaffinity __x64_sys_sched_setaffinity
-204 common sched_getaffinity __x64_sys_sched_getaffinity
+186 common gettid sys_gettid
+187 common readahead sys_readahead
+188 common setxattr sys_setxattr
+189 common lsetxattr sys_lsetxattr
+190 common fsetxattr sys_fsetxattr
+191 common getxattr sys_getxattr
+192 common lgetxattr sys_lgetxattr
+193 common fgetxattr sys_fgetxattr
+194 common listxattr sys_listxattr
+195 common llistxattr sys_llistxattr
+196 common flistxattr sys_flistxattr
+197 common removexattr sys_removexattr
+198 common lremovexattr sys_lremovexattr
+199 common fremovexattr sys_fremovexattr
+200 common tkill sys_tkill
+201 common time sys_time
+202 common futex sys_futex
+203 common sched_setaffinity sys_sched_setaffinity
+204 common sched_getaffinity sys_sched_getaffinity
205 64 set_thread_area
-206 64 io_setup __x64_sys_io_setup
-207 common io_destroy __x64_sys_io_destroy
-208 common io_getevents __x64_sys_io_getevents
-209 64 io_submit __x64_sys_io_submit
-210 common io_cancel __x64_sys_io_cancel
+206 64 io_setup sys_io_setup
+207 common io_destroy sys_io_destroy
+208 common io_getevents sys_io_getevents
+209 64 io_submit sys_io_submit
+210 common io_cancel sys_io_cancel
211 64 get_thread_area
-212 common lookup_dcookie __x64_sys_lookup_dcookie
-213 common epoll_create __x64_sys_epoll_create
+212 common lookup_dcookie sys_lookup_dcookie
+213 common epoll_create sys_epoll_create
214 64 epoll_ctl_old
215 64 epoll_wait_old
-216 common remap_file_pages __x64_sys_remap_file_pages
-217 common getdents64 __x64_sys_getdents64
-218 common set_tid_address __x64_sys_set_tid_address
-219 common restart_syscall __x64_sys_restart_syscall
-220 common semtimedop __x64_sys_semtimedop
-221 common fadvise64 __x64_sys_fadvise64
-222 64 timer_create __x64_sys_timer_create
-223 common timer_settime __x64_sys_timer_settime
-224 common timer_gettime __x64_sys_timer_gettime
-225 common timer_getoverrun __x64_sys_timer_getoverrun
-226 common timer_delete __x64_sys_timer_delete
-227 common clock_settime __x64_sys_clock_settime
-228 common clock_gettime __x64_sys_clock_gettime
-229 common clock_getres __x64_sys_clock_getres
-230 common clock_nanosleep __x64_sys_clock_nanosleep
-231 common exit_group __x64_sys_exit_group
-232 common epoll_wait __x64_sys_epoll_wait
-233 common epoll_ctl __x64_sys_epoll_ctl
-234 common tgkill __x64_sys_tgkill
-235 common utimes __x64_sys_utimes
+216 common remap_file_pages sys_remap_file_pages
+217 common getdents64 sys_getdents64
+218 common set_tid_address sys_set_tid_address
+219 common restart_syscall sys_restart_syscall
+220 common semtimedop sys_semtimedop
+221 common fadvise64 sys_fadvise64
+222 64 timer_create sys_timer_create
+223 common timer_settime sys_timer_settime
+224 common timer_gettime sys_timer_gettime
+225 common timer_getoverrun sys_timer_getoverrun
+226 common timer_delete sys_timer_delete
+227 common clock_settime sys_clock_settime
+228 common clock_gettime sys_clock_gettime
+229 common clock_getres sys_clock_getres
+230 common clock_nanosleep sys_clock_nanosleep
+231 common exit_group sys_exit_group
+232 common epoll_wait sys_epoll_wait
+233 common epoll_ctl sys_epoll_ctl
+234 common tgkill sys_tgkill
+235 common utimes sys_utimes
236 64 vserver
-237 common mbind __x64_sys_mbind
-238 common set_mempolicy __x64_sys_set_mempolicy
-239 common get_mempolicy __x64_sys_get_mempolicy
-240 common mq_open __x64_sys_mq_open
-241 common mq_unlink __x64_sys_mq_unlink
-242 common mq_timedsend __x64_sys_mq_timedsend
-243 common mq_timedreceive __x64_sys_mq_timedreceive
-244 64 mq_notify __x64_sys_mq_notify
-245 common mq_getsetattr __x64_sys_mq_getsetattr
-246 64 kexec_load __x64_sys_kexec_load
-247 64 waitid __x64_sys_waitid
-248 common add_key __x64_sys_add_key
-249 common request_key __x64_sys_request_key
-250 common keyctl __x64_sys_keyctl
-251 common ioprio_set __x64_sys_ioprio_set
-252 common ioprio_get __x64_sys_ioprio_get
-253 common inotify_init __x64_sys_inotify_init
-254 common inotify_add_watch __x64_sys_inotify_add_watch
-255 common inotify_rm_watch __x64_sys_inotify_rm_watch
-256 common migrate_pages __x64_sys_migrate_pages
-257 common openat __x64_sys_openat
-258 common mkdirat __x64_sys_mkdirat
-259 common mknodat __x64_sys_mknodat
-260 common fchownat __x64_sys_fchownat
-261 common futimesat __x64_sys_futimesat
-262 common newfstatat __x64_sys_newfstatat
-263 common unlinkat __x64_sys_unlinkat
-264 common renameat __x64_sys_renameat
-265 common linkat __x64_sys_linkat
-266 common symlinkat __x64_sys_symlinkat
-267 common readlinkat __x64_sys_readlinkat
-268 common fchmodat __x64_sys_fchmodat
-269 common faccessat __x64_sys_faccessat
-270 common pselect6 __x64_sys_pselect6
-271 common ppoll __x64_sys_ppoll
-272 common unshare __x64_sys_unshare
-273 64 set_robust_list __x64_sys_set_robust_list
-274 64 get_robust_list __x64_sys_get_robust_list
-275 common splice __x64_sys_splice
-276 common tee __x64_sys_tee
-277 common sync_file_range __x64_sys_sync_file_range
-278 64 vmsplice __x64_sys_vmsplice
-279 64 move_pages __x64_sys_move_pages
-280 common utimensat __x64_sys_utimensat
-281 common epoll_pwait __x64_sys_epoll_pwait
-282 common signalfd __x64_sys_signalfd
-283 common timerfd_create __x64_sys_timerfd_create
-284 common eventfd __x64_sys_eventfd
-285 common fallocate __x64_sys_fallocate
-286 common timerfd_settime __x64_sys_timerfd_settime
-287 common timerfd_gettime __x64_sys_timerfd_gettime
-288 common accept4 __x64_sys_accept4
-289 common signalfd4 __x64_sys_signalfd4
-290 common eventfd2 __x64_sys_eventfd2
-291 common epoll_create1 __x64_sys_epoll_create1
-292 common dup3 __x64_sys_dup3
-293 common pipe2 __x64_sys_pipe2
-294 common inotify_init1 __x64_sys_inotify_init1
-295 64 preadv __x64_sys_preadv
-296 64 pwritev __x64_sys_pwritev
-297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo
-298 common perf_event_open __x64_sys_perf_event_open
-299 64 recvmmsg __x64_sys_recvmmsg
-300 common fanotify_init __x64_sys_fanotify_init
-301 common fanotify_mark __x64_sys_fanotify_mark
-302 common prlimit64 __x64_sys_prlimit64
-303 common name_to_handle_at __x64_sys_name_to_handle_at
-304 common open_by_handle_at __x64_sys_open_by_handle_at
-305 common clock_adjtime __x64_sys_clock_adjtime
-306 common syncfs __x64_sys_syncfs
-307 64 sendmmsg __x64_sys_sendmmsg
-308 common setns __x64_sys_setns
-309 common getcpu __x64_sys_getcpu
-310 64 process_vm_readv __x64_sys_process_vm_readv
-311 64 process_vm_writev __x64_sys_process_vm_writev
-312 common kcmp __x64_sys_kcmp
-313 common finit_module __x64_sys_finit_module
-314 common sched_setattr __x64_sys_sched_setattr
-315 common sched_getattr __x64_sys_sched_getattr
-316 common renameat2 __x64_sys_renameat2
-317 common seccomp __x64_sys_seccomp
-318 common getrandom __x64_sys_getrandom
-319 common memfd_create __x64_sys_memfd_create
-320 common kexec_file_load __x64_sys_kexec_file_load
-321 common bpf __x64_sys_bpf
-322 64 execveat __x64_sys_execveat/ptregs
-323 common userfaultfd __x64_sys_userfaultfd
-324 common membarrier __x64_sys_membarrier
-325 common mlock2 __x64_sys_mlock2
-326 common copy_file_range __x64_sys_copy_file_range
-327 64 preadv2 __x64_sys_preadv2
-328 64 pwritev2 __x64_sys_pwritev2
-329 common pkey_mprotect __x64_sys_pkey_mprotect
-330 common pkey_alloc __x64_sys_pkey_alloc
-331 common pkey_free __x64_sys_pkey_free
-332 common statx __x64_sys_statx
-333 common io_pgetevents __x64_sys_io_pgetevents
-334 common rseq __x64_sys_rseq
+237 common mbind sys_mbind
+238 common set_mempolicy sys_set_mempolicy
+239 common get_mempolicy sys_get_mempolicy
+240 common mq_open sys_mq_open
+241 common mq_unlink sys_mq_unlink
+242 common mq_timedsend sys_mq_timedsend
+243 common mq_timedreceive sys_mq_timedreceive
+244 64 mq_notify sys_mq_notify
+245 common mq_getsetattr sys_mq_getsetattr
+246 64 kexec_load sys_kexec_load
+247 64 waitid sys_waitid
+248 common add_key sys_add_key
+249 common request_key sys_request_key
+250 common keyctl sys_keyctl
+251 common ioprio_set sys_ioprio_set
+252 common ioprio_get sys_ioprio_get
+253 common inotify_init sys_inotify_init
+254 common inotify_add_watch sys_inotify_add_watch
+255 common inotify_rm_watch sys_inotify_rm_watch
+256 common migrate_pages sys_migrate_pages
+257 common openat sys_openat
+258 common mkdirat sys_mkdirat
+259 common mknodat sys_mknodat
+260 common fchownat sys_fchownat
+261 common futimesat sys_futimesat
+262 common newfstatat sys_newfstatat
+263 common unlinkat sys_unlinkat
+264 common renameat sys_renameat
+265 common linkat sys_linkat
+266 common symlinkat sys_symlinkat
+267 common readlinkat sys_readlinkat
+268 common fchmodat sys_fchmodat
+269 common faccessat sys_faccessat
+270 common pselect6 sys_pselect6
+271 common ppoll sys_ppoll
+272 common unshare sys_unshare
+273 64 set_robust_list sys_set_robust_list
+274 64 get_robust_list sys_get_robust_list
+275 common splice sys_splice
+276 common tee sys_tee
+277 common sync_file_range sys_sync_file_range
+278 64 vmsplice sys_vmsplice
+279 64 move_pages sys_move_pages
+280 common utimensat sys_utimensat
+281 common epoll_pwait sys_epoll_pwait
+282 common signalfd sys_signalfd
+283 common timerfd_create sys_timerfd_create
+284 common eventfd sys_eventfd
+285 common fallocate sys_fallocate
+286 common timerfd_settime sys_timerfd_settime
+287 common timerfd_gettime sys_timerfd_gettime
+288 common accept4 sys_accept4
+289 common signalfd4 sys_signalfd4
+290 common eventfd2 sys_eventfd2
+291 common epoll_create1 sys_epoll_create1
+292 common dup3 sys_dup3
+293 common pipe2 sys_pipe2
+294 common inotify_init1 sys_inotify_init1
+295 64 preadv sys_preadv
+296 64 pwritev sys_pwritev
+297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+298 common perf_event_open sys_perf_event_open
+299 64 recvmmsg sys_recvmmsg
+300 common fanotify_init sys_fanotify_init
+301 common fanotify_mark sys_fanotify_mark
+302 common prlimit64 sys_prlimit64
+303 common name_to_handle_at sys_name_to_handle_at
+304 common open_by_handle_at sys_open_by_handle_at
+305 common clock_adjtime sys_clock_adjtime
+306 common syncfs sys_syncfs
+307 64 sendmmsg sys_sendmmsg
+308 common setns sys_setns
+309 common getcpu sys_getcpu
+310 64 process_vm_readv sys_process_vm_readv
+311 64 process_vm_writev sys_process_vm_writev
+312 common kcmp sys_kcmp
+313 common finit_module sys_finit_module
+314 common sched_setattr sys_sched_setattr
+315 common sched_getattr sys_sched_getattr
+316 common renameat2 sys_renameat2
+317 common seccomp sys_seccomp
+318 common getrandom sys_getrandom
+319 common memfd_create sys_memfd_create
+320 common kexec_file_load sys_kexec_file_load
+321 common bpf sys_bpf
+322 64 execveat sys_execveat
+323 common userfaultfd sys_userfaultfd
+324 common membarrier sys_membarrier
+325 common mlock2 sys_mlock2
+326 common copy_file_range sys_copy_file_range
+327 64 preadv2 sys_preadv2
+328 64 pwritev2 sys_pwritev2
+329 common pkey_mprotect sys_pkey_mprotect
+330 common pkey_alloc sys_pkey_alloc
+331 common pkey_free sys_pkey_free
+332 common statx sys_statx
+333 common io_pgetevents sys_io_pgetevents
+334 common rseq sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
-424 common pidfd_send_signal __x64_sys_pidfd_send_signal
-425 common io_uring_setup __x64_sys_io_uring_setup
-426 common io_uring_enter __x64_sys_io_uring_enter
-427 common io_uring_register __x64_sys_io_uring_register
-428 common open_tree __x64_sys_open_tree
-429 common move_mount __x64_sys_move_mount
-430 common fsopen __x64_sys_fsopen
-431 common fsconfig __x64_sys_fsconfig
-432 common fsmount __x64_sys_fsmount
-433 common fspick __x64_sys_fspick
-434 common pidfd_open __x64_sys_pidfd_open
-435 common clone3 __x64_sys_clone3/ptregs
-437 common openat2 __x64_sys_openat2
-438 common pidfd_getfd __x64_sys_pidfd_getfd
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -366,39 +366,39 @@
# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
# is defined.
#
-512 x32 rt_sigaction __x32_compat_sys_rt_sigaction
-513 x32 rt_sigreturn sys32_x32_rt_sigreturn
-514 x32 ioctl __x32_compat_sys_ioctl
-515 x32 readv __x32_compat_sys_readv
-516 x32 writev __x32_compat_sys_writev
-517 x32 recvfrom __x32_compat_sys_recvfrom
-518 x32 sendmsg __x32_compat_sys_sendmsg
-519 x32 recvmsg __x32_compat_sys_recvmsg
-520 x32 execve __x32_compat_sys_execve/ptregs
-521 x32 ptrace __x32_compat_sys_ptrace
-522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64
-524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
-525 x32 sigaltstack __x32_compat_sys_sigaltstack
-526 x32 timer_create __x32_compat_sys_timer_create
-527 x32 mq_notify __x32_compat_sys_mq_notify
-528 x32 kexec_load __x32_compat_sys_kexec_load
-529 x32 waitid __x32_compat_sys_waitid
-530 x32 set_robust_list __x32_compat_sys_set_robust_list
-531 x32 get_robust_list __x32_compat_sys_get_robust_list
-532 x32 vmsplice __x32_compat_sys_vmsplice
-533 x32 move_pages __x32_compat_sys_move_pages
-534 x32 preadv __x32_compat_sys_preadv64
-535 x32 pwritev __x32_compat_sys_pwritev64
-536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64
-538 x32 sendmmsg __x32_compat_sys_sendmmsg
-539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
-540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
-541 x32 setsockopt __x32_compat_sys_setsockopt
-542 x32 getsockopt __x32_compat_sys_getsockopt
-543 x32 io_setup __x32_compat_sys_io_setup
-544 x32 io_submit __x32_compat_sys_io_submit
-545 x32 execveat __x32_compat_sys_execveat/ptregs
-546 x32 preadv2 __x32_compat_sys_preadv64v2
-547 x32 pwritev2 __x32_compat_sys_pwritev64v2
+512 x32 rt_sigaction compat_sys_rt_sigaction
+513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
+514 x32 ioctl compat_sys_ioctl
+515 x32 readv compat_sys_readv
+516 x32 writev compat_sys_writev
+517 x32 recvfrom compat_sys_recvfrom
+518 x32 sendmsg compat_sys_sendmsg
+519 x32 recvmsg compat_sys_recvmsg
+520 x32 execve compat_sys_execve
+521 x32 ptrace compat_sys_ptrace
+522 x32 rt_sigpending compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack compat_sys_sigaltstack
+526 x32 timer_create compat_sys_timer_create
+527 x32 mq_notify compat_sys_mq_notify
+528 x32 kexec_load compat_sys_kexec_load
+529 x32 waitid compat_sys_waitid
+530 x32 set_robust_list compat_sys_set_robust_list
+531 x32 get_robust_list compat_sys_get_robust_list
+532 x32 vmsplice compat_sys_vmsplice
+533 x32 move_pages compat_sys_move_pages
+534 x32 preadv compat_sys_preadv64
+535 x32 pwritev compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg compat_sys_recvmmsg_time64
+538 x32 sendmmsg compat_sys_sendmmsg
+539 x32 process_vm_readv compat_sys_process_vm_readv
+540 x32 process_vm_writev compat_sys_process_vm_writev
+541 x32 setsockopt compat_sys_setsockopt
+542 x32 getsockopt compat_sys_getsockopt
+543 x32 io_setup compat_sys_io_setup
+544 x32 io_submit compat_sys_io_submit
+545 x32 execveat compat_sys_execveat
+546 x32 preadv2 compat_sys_preadv64v2
+547 x32 pwritev2 compat_sys_pwritev64v2
diff --git a/arch/x86/entry/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
index 12fbbcfe7ef3..cc1e63857427 100644
--- a/arch/x86/entry/syscalls/syscallhdr.sh
+++ b/arch/x86/entry/syscalls/syscallhdr.sh
@@ -15,14 +15,21 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
echo "#define ${fileguard} 1"
echo ""
+ max=0
while read nr abi name entry ; do
if [ -z "$offset" ]; then
echo "#define __NR_${prefix}${name} $nr"
else
echo "#define __NR_${prefix}${name} ($offset + $nr)"
fi
+
+ max=$nr
done
echo ""
+ echo "#ifdef __KERNEL__"
+ echo "#define __NR_${prefix}syscall_max $max"
+ echo "#endif"
+ echo ""
echo "#endif /* ${fileguard} */"
) > "$out"
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 1af2be39e7d9..929bde120d6b 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -9,15 +9,7 @@ syscall_macro() {
local nr="$2"
local entry="$3"
- # Entry can be either just a function name or "function/qualifier"
- real_entry="${entry%%/*}"
- if [ "$entry" = "$real_entry" ]; then
- qualifier=
- else
- qualifier=${entry#*/}
- fi
-
- echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+ echo "__SYSCALL_${abi}($nr, $entry)"
}
emit() {
@@ -25,27 +17,15 @@ emit() {
local nr="$2"
local entry="$3"
local compat="$4"
- local umlentry=""
if [ "$abi" != "I386" -a -n "$compat" ]; then
echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
exit 1
fi
- # For CONFIG_UML, we need to strip the __x64_sys prefix
- if [ "$abi" = "64" -a "${entry}" != "${entry#__x64_sys}" ]; then
- umlentry="sys${entry#__x64_sys}"
- fi
-
if [ -z "$compat" ]; then
- if [ -n "$entry" -a -z "$umlentry" ]; then
- syscall_macro "$abi" "$nr" "$entry"
- elif [ -n "$umlentry" ]; then # implies -n "$entry"
- echo "#ifdef CONFIG_X86"
+ if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
- echo "#else /* CONFIG_UML */"
- syscall_macro "$abi" "$nr" "$umlentry"
- echo "#endif"
fi
else
echo "#ifdef CONFIG_X86_32"
@@ -61,24 +41,6 @@ emit() {
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
- if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
- emit 64 "$nr" "$entry" "$compat"
- if [ "$abi" = "COMMON" ]; then
- # COMMON means that this syscall exists in the same form for
- # 64-bit and X32.
- echo "#ifdef CONFIG_X86_X32_ABI"
- emit X32 "$nr" "$entry" "$compat"
- echo "#endif"
- fi
- elif [ "$abi" = "X32" ]; then
- echo "#ifdef CONFIG_X86_X32_ABI"
- emit X32 "$nr" "$entry" "$compat"
- echo "#endif"
- elif [ "$abi" = "I386" ]; then
- emit "$abi" "$nr" "$entry" "$compat"
- else
- echo "Unknown abi $abi" >&2
- exit 1
- fi
+ emit "$abi" "$nr" "$entry" "$compat"
done
) > "$out"
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index e010d4ae11f1..3a07ce3ec70b 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -35,9 +35,9 @@ SYM_CODE_END(\name)
#endif
#ifdef CONFIG_PREEMPTION
- THUNK ___preempt_schedule, preempt_schedule
- THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
- EXPORT_SYMBOL(___preempt_schedule)
- EXPORT_SYMBOL(___preempt_schedule_notrace)
+ THUNK preempt_schedule_thunk, preempt_schedule
+ THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+ EXPORT_SYMBOL(preempt_schedule_thunk)
+ EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
#endif
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index c5c3b6e86e62..dbe4493b534e 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -47,10 +47,10 @@ SYM_FUNC_END(\name)
#endif
#ifdef CONFIG_PREEMPTION
- THUNK ___preempt_schedule, preempt_schedule
- THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
- EXPORT_SYMBOL(___preempt_schedule)
- EXPORT_SYMBOL(___preempt_schedule_notrace)
+ THUNK preempt_schedule_thunk, preempt_schedule
+ THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+ EXPORT_SYMBOL(preempt_schedule_thunk)
+ EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
#endif
#if defined(CONFIG_TRACE_IRQFLAGS) \
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index ea7e0155c604..4d152933547d 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -57,6 +57,13 @@ SECTIONS
*(.gnu.linkonce.b.*)
} :text
+ /*
+ * Discard .note.gnu.property sections which are unused and have
+ * different alignment requirement from vDSO note sections.
+ */
+ /DISCARD/ : {
+ *(.note.gnu.property)
+ }
.note : { *(.note.*) } :text :note
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 9242b28418d5..1e82bd43286c 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -13,6 +13,7 @@
*/
#undef CONFIG_64BIT
#undef CONFIG_X86_64
+#undef CONFIG_COMPAT
#undef CONFIG_PGTABLE_LEVELS
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index c1b8496b5606..43428cc514c8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -38,6 +38,8 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page)
}
#undef EMIT_VVAR
+unsigned int vclocks_used __read_mostly;
+
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
#endif
@@ -219,7 +221,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_get_pvti_cpu0_va();
- if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+ if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
return vmf_insert_pfn_prot(vma, vmf->address,
__pa(pvti) >> PAGE_SHIFT,
pgprot_decrypted(vma->vm_page_prot));
@@ -227,7 +229,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
} else if (sym_offset == image->sym_hvclock_page) {
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
- if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
+ if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
return vmf_insert_pfn(vma, vmf->address,
virt_to_phys(tsc_pg) >> PAGE_SHIFT);
} else if (sym_offset == image->sym_timens_page) {
@@ -445,6 +447,8 @@ __setup("vdso=", vdso_setup);
static int __init init_vdso(void)
{
+ BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
+
init_vdso_image(&vdso_image_64);
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 1f22b6bbda68..39eb276d0277 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,6 +250,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index abef51320e3a..43b09e9c93a2 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -259,7 +259,7 @@ static int power_cpu_init(unsigned int cpu)
}
static const struct x86_cpu_id cpu_match[] = {
- { .vendor = X86_VENDOR_AMD, .family = 0x15 },
+ X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL),
{},
};
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index a6ea07f2aa84..76400c052b0e 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -180,6 +180,31 @@ static void amd_uncore_del(struct perf_event *event, int flags)
hwc->idx = -1;
}
+/*
+ * Convert logical CPU number to L3 PMC Config ThreadMask format
+ */
+static u64 l3_thread_slice_mask(int cpu)
+{
+ u64 thread_mask, core = topology_core_id(cpu);
+ unsigned int shift, thread = 0;
+
+ if (topology_smt_supported() && !topology_is_primary_thread(cpu))
+ thread = 1;
+
+ if (boot_cpu_data.x86 <= 0x18) {
+ shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
+ thread_mask = BIT_ULL(shift);
+
+ return AMD64_L3_SLICE_MASK | thread_mask;
+ }
+
+ core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
+ shift = AMD64_L3_THREAD_SHIFT + thread;
+ thread_mask = BIT_ULL(shift);
+
+ return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
+}
+
static int amd_uncore_event_init(struct perf_event *event)
{
struct amd_uncore *uncore;
@@ -190,15 +215,12 @@ static int amd_uncore_event_init(struct perf_event *event)
/*
* NB and Last level cache counters (MSRs) are shared across all cores
- * that share the same NB / Last level cache. Interrupts can be directed
- * to a single target core, however, event counts generated by processes
- * running on other cores cannot be masked out. So we do not support
- * sampling and per-thread events.
+ * that share the same NB / Last level cache. On family 16h and below,
+ * Interrupts can be directed to a single target core, however, event
+ * counts generated by processes running on other cores cannot be masked
+ * out. So we do not support sampling and per-thread events via
+ * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
*/
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- /* and we do not enable counter overflow interrupts */
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
@@ -206,18 +228,11 @@ static int amd_uncore_event_init(struct perf_event *event)
return -EINVAL;
/*
- * SliceMask and ThreadMask need to be set for certain L3 events in
- * Family 17h. For other events, the two fields do not affect the count.
+ * SliceMask and ThreadMask need to be set for certain L3 events.
+ * For other events, the two fields do not affect the count.
*/
- if (l3_mask && is_llc_event(event)) {
- int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4);
-
- if (smp_num_siblings > 1)
- thread += cpu_data(event->cpu).apicid & 1;
-
- hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) &
- AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK;
- }
+ if (l3_mask && is_llc_event(event))
+ hwc->config |= l3_thread_slice_mask(event->cpu);
uncore = event_to_amd_uncore(event);
if (!uncore)
@@ -306,7 +321,7 @@ static struct pmu amd_nb_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
};
static struct pmu amd_llc_pmu = {
@@ -317,7 +332,7 @@ static struct pmu amd_llc_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
};
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
@@ -523,9 +538,9 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;
- if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
+ if (boot_cpu_data.x86 >= 0x17) {
/*
- * For F17h or F18h, the Northbridge counters are
+ * For F17h and above, the Northbridge counters are
* repurposed as Data Fabric counters. Also, L3
* counters are supported too. The PMUs are exported
* based on family as either L2 or L3 and NB or DF.
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 3bb738f5a472..a619763e96e1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2490,7 +2490,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
/* 32-bit process in 64-bit kernel. */
unsigned long ss_base, cs_base;
struct stack_frame_ia32 frame;
- const void __user *fp;
+ const struct stack_frame_ia32 __user *fp;
if (!test_thread_flag(TIF_IA32))
return 0;
@@ -2501,18 +2501,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
while (entry->nr < entry->max_stack) {
- unsigned long bytes;
- frame.next_frame = 0;
- frame.return_address = 0;
-
if (!valid_user_frame(fp, sizeof(frame)))
break;
- bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
- if (bytes != 0)
+ if (__get_user(frame.next_frame, &fp->next_frame))
break;
- bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
- if (bytes != 0)
+ if (__get_user(frame.return_address, &fp->return_address))
break;
perf_callchain_store(entry, cs_base + frame.return_address);
@@ -2533,7 +2527,7 @@ void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
struct stack_frame frame;
- const unsigned long __user *fp;
+ const struct stack_frame __user *fp;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
@@ -2546,7 +2540,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
return;
- fp = (unsigned long __user *)regs->bp;
+ fp = (void __user *)regs->bp;
perf_callchain_store(entry, regs->ip);
@@ -2558,19 +2552,12 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
pagefault_disable();
while (entry->nr < entry->max_stack) {
- unsigned long bytes;
-
- frame.next_frame = NULL;
- frame.return_address = 0;
-
if (!valid_user_frame(fp, sizeof(frame)))
break;
- bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
- if (bytes != 0)
+ if (__get_user(frame.next_frame, &fp->next_frame))
break;
- bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
- if (bytes != 0)
+ if (__get_user(frame.return_address, &fp->return_address))
break;
perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3be51aa06e67..332954cccece 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1945,6 +1945,14 @@ static __initconst const u64 knl_hw_cache_extra_regs
* intel_bts events don't coexist with intel PMU's BTS events because of
* x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
* disabled around intel PMU's event batching etc, only inside the PMI handler.
+ *
+ * Avoid PEBS_ENABLE MSR access in PMIs.
+ * The GLOBAL_CTRL has been disabled. All the counters do not count anymore.
+ * It doesn't matter if the PEBS is enabled or not.
+ * Usually, the PEBS status are not changed in PMIs. It's unnecessary to
+ * access PEBS_ENABLE MSR in disable_all()/enable_all().
+ * However, there are some cases which may change PEBS status, e.g. PMI
+ * throttle. The PEBS_ENABLE should be updated where the status changes.
*/
static void __intel_pmu_disable_all(void)
{
@@ -1954,13 +1962,12 @@ static void __intel_pmu_disable_all(void)
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
-
- intel_pmu_pebs_disable_all();
}
static void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all();
+ intel_pmu_pebs_disable_all();
intel_pmu_lbr_disable_all();
}
@@ -1968,7 +1975,6 @@ static void __intel_pmu_enable_all(int added, bool pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all(pmi);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
@@ -1986,6 +1992,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)
static void intel_pmu_enable_all(int added)
{
+ intel_pmu_pebs_enable_all();
__intel_pmu_enable_all(added, false);
}
@@ -2374,9 +2381,21 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
* PEBS overflow sets bit 62 in the global status register
*/
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ u64 pebs_enabled = cpuc->pebs_enabled;
+
handled++;
x86_pmu.drain_pebs(regs);
status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
+
+ /*
+ * PMI throttle may be triggered, which stops the PEBS event.
+ * Although cpuc->pebs_enabled is updated accordingly, the
+ * MSR_IA32_PEBS_ENABLE is not updated. Because the
+ * cpuc->enabled has been forced to 0 in PMI.
+ * Update the MSR if pebs_enabled is changed.
+ */
+ if (pebs_enabled != cpuc->pebs_enabled)
+ wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}
/*
@@ -4765,6 +4784,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_TREMONT_D:
+ case INTEL_FAM6_ATOM_TREMONT:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index e1daf4151e11..e4aa20c0426f 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -40,17 +40,18 @@
* Model specific counters:
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
- * Available model: SLM,AMT,GLM,CNL
+ * Available model: SLM,AMT,GLM,CNL,TNT
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
- * CNL,KBL,CML
+ * CNL,KBL,CML,TNT
* Scope: Core
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
+ * TNT
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
@@ -60,17 +61,18 @@
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
- * KBL,CML,ICL,TGL
+ * KBL,CML,ICL,TGL,TNT
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
- * GLM,CNL,KBL,CML,ICL,TGL
+ * GLM,CNL,KBL,CML,ICL,TGL,TNT
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
- * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
+ * TNT
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
@@ -87,7 +89,8 @@
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
- * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
+ * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
+ * TNT
* Scope: Package (physical package)
*
*/
@@ -591,62 +594,60 @@ static const struct cstate_model glm_cstates __initconst = {
};
-#define X86_CSTATES_MODEL(model, states) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
-
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
-
- X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_G, snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_L, hswult_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_D, slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_D, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_G, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &slm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_L, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &snb_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &hswult_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &hswult_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &hswult_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnl_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &glm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 4b94ae4ae369..dc43cc124e09 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1714,6 +1714,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
old = ((s64)(prev_raw_count << shift) >> shift);
local64_add(new - old + count * period, &event->count);
+ local64_set(&hwc->period_left, -new);
+
perf_event_update_userpage(event);
return 0;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 534c76606049..65113b16804a 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -585,6 +585,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
+ cpuc->lbr_stack.hw_idx = tos;
}
/*
@@ -680,6 +681,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
out++;
}
cpuc->lbr_stack.nr = out;
+ cpuc->lbr_stack.hw_idx = tos;
}
void intel_pmu_lbr_read(void)
@@ -1120,6 +1122,13 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
int i;
cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
+
+ /* Cannot get TOS for large PEBS */
+ if (cpuc->n_pebs == cpuc->n_large_pebs)
+ cpuc->lbr_stack.hw_idx = -1ULL;
+ else
+ cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
+
for (i = 0; i < x86_pmu.lbr_nr; i++) {
u64 info = lbr->lbr[i].info;
struct perf_branch_entry *e = &cpuc->lbr_entries[i];
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 09913121e726..a5dbd25852cb 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -668,9 +668,6 @@ static int __init init_rapl_pmus(void)
return 0;
}
-#define X86_RAPL_MODEL_MATCH(model, init) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
-
static struct rapl_model model_snb = {
.events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
@@ -716,36 +713,35 @@ static struct rapl_model model_skl = {
};
static const struct x86_cpu_id rapl_model_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_L, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_G, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, model_hsx),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_L, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_D, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE, model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
{},
};
-
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
static int __init rapl_pmu_init(void)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 86467f85c383..1ba72c563313 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1392,10 +1392,6 @@ err:
return ret;
}
-
-#define X86_UNCORE_MODEL_MATCH(model, init) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
-
struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
@@ -1470,6 +1466,16 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
.pci_init = skl_uncore_pci_init,
};
+static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
+ .cpu_init = icl_uncore_cpu_init,
+ .mmio_init = tgl_uncore_mmio_init,
+};
+
+static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
+ .cpu_init = icl_uncore_cpu_init,
+ .mmio_init = tgl_l_uncore_mmio_init,
+};
+
static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
.cpu_init = snr_uncore_cpu_init,
.pci_init = snr_uncore_pci_init,
@@ -1477,38 +1483,39 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL, hsw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_L, hsw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_G, hsw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL, bdw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, bdw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, bdx_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE, skl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, skl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, skl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE, skl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{},
};
-
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
static int __init intel_uncore_init(void)
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index bbfdaa720b45..b30429f8a53a 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -154,6 +154,7 @@ struct freerunning_counters {
unsigned int box_offset;
unsigned int num_counters;
unsigned int bits;
+ unsigned *box_offsets;
};
struct pci2phy_map {
@@ -310,7 +311,9 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
return pmu->type->freerunning[type].counter_base +
pmu->type->freerunning[type].counter_offset * idx +
- pmu->type->freerunning[type].box_offset * pmu->pmu_idx;
+ (pmu->type->freerunning[type].box_offsets ?
+ pmu->type->freerunning[type].box_offsets[pmu->pmu_idx] :
+ pmu->type->freerunning[type].box_offset * pmu->pmu_idx);
}
static inline
@@ -527,6 +530,8 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
+void tgl_uncore_mmio_init(void);
+void tgl_l_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index c37cb12d0ef6..3de1065eefc4 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -44,6 +44,11 @@
#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35
#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02
#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12
+#define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02
+#define PCI_DEVICE_ID_INTEL_TGL_U2_IMC 0x9a04
+#define PCI_DEVICE_ID_INTEL_TGL_U3_IMC 0x9a12
+#define PCI_DEVICE_ID_INTEL_TGL_U4_IMC 0x9a14
+#define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36
/* SNB event control */
@@ -1002,3 +1007,157 @@ void nhm_uncore_cpu_init(void)
}
/* end of Nehalem uncore support */
+
+/* Tiger Lake MMIO uncore support */
+
+static const struct pci_device_id tgl_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+enum perf_tgl_uncore_imc_freerunning_types {
+ TGL_MMIO_UNCORE_IMC_DATA_TOTAL,
+ TGL_MMIO_UNCORE_IMC_DATA_READ,
+ TGL_MMIO_UNCORE_IMC_DATA_WRITE,
+ TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
+};
+
+static struct freerunning_counters tgl_l_uncore_imc_freerunning[] = {
+ [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x5040, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0x5058, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0x50A0, 0x0, 0x0, 1, 64 },
+};
+
+static struct freerunning_counters tgl_uncore_imc_freerunning[] = {
+ [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0xd840, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0xd858, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xd8A0, 0x0, 0x0, 1, 64 },
+};
+
+static struct uncore_event_desc tgl_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(data_total, "event=0xff,umask=0x10"),
+ INTEL_UNCORE_EVENT_DESC(data_total.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_total.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(data_read, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(data_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_read.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(data_write, "event=0xff,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(data_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_write.unit, "MiB"),
+
+ { /* end: all zeroes */ }
+};
+
+static struct pci_dev *tgl_uncore_get_mc_dev(void)
+{
+ const struct pci_device_id *ids = tgl_uncore_pci_ids;
+ struct pci_dev *mc_dev = NULL;
+
+ while (ids && ids->vendor) {
+ mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, ids->device, NULL);
+ if (mc_dev)
+ return mc_dev;
+ ids++;
+ }
+
+ return mc_dev;
+}
+
+#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
+
+static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = tgl_uncore_get_mc_dev();
+ struct intel_uncore_pmu *pmu = box->pmu;
+ resource_size_t addr;
+ u32 mch_bar;
+
+ if (!pdev) {
+ pr_warn("perf uncore: Cannot find matched IMC device.\n");
+ return;
+ }
+
+ pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar);
+ /* MCHBAR is disabled */
+ if (!(mch_bar & BIT(0))) {
+ pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n");
+ return;
+ }
+ mch_bar &= ~BIT(0);
+ addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx);
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar);
+ addr |= ((resource_size_t)mch_bar << 32);
+#endif
+
+ box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+}
+
+static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
+ .init_box = tgl_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct attribute *tgl_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL
+};
+
+static const struct attribute_group tgl_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = tgl_uncore_imc_formats_attr,
+};
+
+static struct intel_uncore_type tgl_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = tgl_uncore_imc_freerunning,
+ .ops = &tgl_uncore_imc_freerunning_ops,
+ .event_descs = tgl_uncore_imc_events,
+ .format_group = &tgl_uncore_imc_format_group,
+};
+
+static struct intel_uncore_type *tgl_mmio_uncores[] = {
+ &tgl_uncore_imc_free_running,
+ NULL
+};
+
+void tgl_l_uncore_mmio_init(void)
+{
+ tgl_uncore_imc_free_running.freerunning = tgl_l_uncore_imc_freerunning;
+ uncore_mmio_uncores = tgl_mmio_uncores;
+}
+
+void tgl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = tgl_mmio_uncores;
+}
+
+/* end of Tiger Lake MMIO uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index ad20220af303..01023f0d935b 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -4380,10 +4380,10 @@ static struct pci_dev *snr_uncore_get_mc_dev(int id)
return mc_dev;
}
-static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
+static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
+ unsigned int box_ctl, int mem_offset)
{
struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid);
- unsigned int box_ctl = uncore_mmio_box_ctl(box);
resource_size_t addr;
u32 pci_dword;
@@ -4393,7 +4393,7 @@ static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword);
addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
- pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword);
+ pci_read_config_dword(pdev, mem_offset, &pci_dword);
addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12;
addr += box_ctl;
@@ -4405,6 +4405,12 @@ static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
}
+static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
+{
+ __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box),
+ SNR_IMC_MMIO_MEM0_OFFSET);
+}
+
static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box)
{
u32 config;
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 6f86650b3f77..a949f6f55991 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -75,8 +75,9 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_GOLDMONT_D:
-
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_FAM6_ATOM_TREMONT_D:
+ case INTEL_FAM6_ATOM_TREMONT:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index d13b352b2aa7..8e4d0391ff6c 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,7 @@
# Makefile for the ia32 kernel emulation subsystem.
#
-obj-$(CONFIG_IA32_EMULATION) := sys_ia32.o ia32_signal.o
+obj-$(CONFIG_IA32_EMULATION) := ia32_signal.o
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a3aefe9b9401..f9d8804144d0 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -36,70 +36,56 @@
#include <asm/sighandling.h>
#include <asm/smap.h>
+static inline void reload_segments(struct sigcontext_32 *sc)
+{
+ unsigned int cur;
+
+ savesegment(gs, cur);
+ if ((sc->gs | 0x03) != cur)
+ load_gs_index(sc->gs | 0x03);
+ savesegment(fs, cur);
+ if ((sc->fs | 0x03) != cur)
+ loadsegment(fs, sc->fs | 0x03);
+ savesegment(ds, cur);
+ if ((sc->ds | 0x03) != cur)
+ loadsegment(ds, sc->ds | 0x03);
+ savesegment(es, cur);
+ if ((sc->es | 0x03) != cur)
+ loadsegment(es, sc->es | 0x03);
+}
+
/*
* Do a signal return; undo the signal stack.
*/
-#define loadsegment_gs(v) load_gs_index(v)
-#define loadsegment_fs(v) loadsegment(fs, v)
-#define loadsegment_ds(v) loadsegment(ds, v)
-#define loadsegment_es(v) loadsegment(es, v)
-
-#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; })
-#define set_user_seg(seg, v) loadsegment_##seg(v)
-
-#define COPY(x) { \
- get_user_ex(regs->x, &sc->x); \
-}
-
-#define GET_SEG(seg) ({ \
- unsigned short tmp; \
- get_user_ex(tmp, &sc->seg); \
- tmp; \
-})
-
-#define COPY_SEG_CPL3(seg) do { \
- regs->seg = GET_SEG(seg) | 3; \
-} while (0)
-
-#define RELOAD_SEG(seg) { \
- unsigned int pre = (seg) | 3; \
- unsigned int cur = get_user_seg(seg); \
- if (pre != cur) \
- set_user_seg(seg, pre); \
-}
-
static int ia32_restore_sigcontext(struct pt_regs *regs,
- struct sigcontext_32 __user *sc)
+ struct sigcontext_32 __user *usc)
{
- unsigned int tmpflags, err = 0;
- u16 gs, fs, es, ds;
- void __user *buf;
- u32 tmp;
+ struct sigcontext_32 sc;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
- get_user_try {
- gs = GET_SEG(gs);
- fs = GET_SEG(fs);
- ds = GET_SEG(ds);
- es = GET_SEG(es);
-
- COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip); COPY(ax);
- /* Don't touch extended registers */
-
- COPY_SEG_CPL3(cs);
- COPY_SEG_CPL3(ss);
-
- get_user_ex(tmpflags, &sc->flags);
- regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- /* disable syscall checks */
- regs->orig_ax = -1;
+ if (unlikely(copy_from_user(&sc, usc, sizeof(sc))))
+ return -EFAULT;
- get_user_ex(tmp, &sc->fpstate);
- buf = compat_ptr(tmp);
- } get_user_catch(err);
+ /* Get only the ia32 registers. */
+ regs->bx = sc.bx;
+ regs->cx = sc.cx;
+ regs->dx = sc.dx;
+ regs->si = sc.si;
+ regs->di = sc.di;
+ regs->bp = sc.bp;
+ regs->ax = sc.ax;
+ regs->sp = sc.sp;
+ regs->ip = sc.ip;
+
+ /* Get CS/SS and force CPL3 */
+ regs->cs = sc.cs | 0x03;
+ regs->ss = sc.ss | 0x03;
+
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
+ /* disable syscall checks */
+ regs->orig_ax = -1;
/*
* Reload fs and gs if they have changed in the signal
@@ -107,14 +93,8 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
* the handler, but does not clobber them at least in the
* normal case.
*/
- RELOAD_SEG(gs);
- RELOAD_SEG(fs);
- RELOAD_SEG(ds);
- RELOAD_SEG(es);
-
- err |= fpu__restore_sig(buf, 1);
-
- return err;
+ reload_segments(&sc);
+ return fpu__restore_sig(compat_ptr(sc.fpstate), 1);
}
COMPAT_SYSCALL_DEFINE0(sigreturn)
@@ -126,10 +106,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
- || (_COMPAT_NSIG_WORDS > 1
- && __copy_from_user((((char *) &set.sig) + 4),
- &frame->extramask,
- sizeof(frame->extramask))))
+ || __get_user(((__u32 *)&set)[1], &frame->extramask[0]))
goto badframe;
set_current_blocked(&set);
@@ -153,7 +130,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask))
goto badframe;
set_current_blocked(&set);
@@ -175,44 +152,51 @@ badframe:
* Set up a signal frame.
*/
-static int ia32_setup_sigcontext(struct sigcontext_32 __user *sc,
- void __user *fpstate,
- struct pt_regs *regs, unsigned int mask)
+#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; })
+
+static __always_inline int
+__unsafe_setup_sigcontext32(struct sigcontext_32 __user *sc,
+ void __user *fpstate,
+ struct pt_regs *regs, unsigned int mask)
{
- int err = 0;
-
- put_user_try {
- put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs);
- put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs);
- put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds);
- put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es);
-
- put_user_ex(regs->di, &sc->di);
- put_user_ex(regs->si, &sc->si);
- put_user_ex(regs->bp, &sc->bp);
- put_user_ex(regs->sp, &sc->sp);
- put_user_ex(regs->bx, &sc->bx);
- put_user_ex(regs->dx, &sc->dx);
- put_user_ex(regs->cx, &sc->cx);
- put_user_ex(regs->ax, &sc->ax);
- put_user_ex(current->thread.trap_nr, &sc->trapno);
- put_user_ex(current->thread.error_code, &sc->err);
- put_user_ex(regs->ip, &sc->ip);
- put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
- put_user_ex(regs->flags, &sc->flags);
- put_user_ex(regs->sp, &sc->sp_at_signal);
- put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
-
- put_user_ex(ptr_to_compat(fpstate), &sc->fpstate);
-
- /* non-iBCS2 extensions.. */
- put_user_ex(mask, &sc->oldmask);
- put_user_ex(current->thread.cr2, &sc->cr2);
- } put_user_catch(err);
-
- return err;
+ unsafe_put_user(get_user_seg(gs), (unsigned int __user *)&sc->gs, Efault);
+ unsafe_put_user(get_user_seg(fs), (unsigned int __user *)&sc->fs, Efault);
+ unsafe_put_user(get_user_seg(ds), (unsigned int __user *)&sc->ds, Efault);
+ unsafe_put_user(get_user_seg(es), (unsigned int __user *)&sc->es, Efault);
+
+ unsafe_put_user(regs->di, &sc->di, Efault);
+ unsafe_put_user(regs->si, &sc->si, Efault);
+ unsafe_put_user(regs->bp, &sc->bp, Efault);
+ unsafe_put_user(regs->sp, &sc->sp, Efault);
+ unsafe_put_user(regs->bx, &sc->bx, Efault);
+ unsafe_put_user(regs->dx, &sc->dx, Efault);
+ unsafe_put_user(regs->cx, &sc->cx, Efault);
+ unsafe_put_user(regs->ax, &sc->ax, Efault);
+ unsafe_put_user(current->thread.trap_nr, &sc->trapno, Efault);
+ unsafe_put_user(current->thread.error_code, &sc->err, Efault);
+ unsafe_put_user(regs->ip, &sc->ip, Efault);
+ unsafe_put_user(regs->cs, (unsigned int __user *)&sc->cs, Efault);
+ unsafe_put_user(regs->flags, &sc->flags, Efault);
+ unsafe_put_user(regs->sp, &sc->sp_at_signal, Efault);
+ unsafe_put_user(regs->ss, (unsigned int __user *)&sc->ss, Efault);
+
+ unsafe_put_user(ptr_to_compat(fpstate), &sc->fpstate, Efault);
+
+ /* non-iBCS2 extensions.. */
+ unsafe_put_user(mask, &sc->oldmask, Efault);
+ unsafe_put_user(current->thread.cr2, &sc->cr2, Efault);
+ return 0;
+
+Efault:
+ return -EFAULT;
}
+#define unsafe_put_sigcontext32(sc, fp, regs, set, label) \
+do { \
+ if (__unsafe_setup_sigcontext32(sc, fp, regs, set->sig[0])) \
+ goto label; \
+} while(0)
+
/*
* Determine which stack to use..
*/
@@ -252,8 +236,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
{
struct sigframe_ia32 __user *frame;
void __user *restorer;
- int err = 0;
- void __user *fpstate = NULL;
+ void __user *fp = NULL;
/* copy_to_user optimizes that into a single 8 byte store */
static const struct {
@@ -266,22 +249,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
0x80cd, /* int $0x80 */
};
- frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
-
- if (!access_ok(frame, sizeof(*frame)))
- return -EFAULT;
-
- if (__put_user(sig, &frame->sig))
- return -EFAULT;
-
- if (ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
- return -EFAULT;
-
- if (_COMPAT_NSIG_WORDS > 1) {
- if (__copy_to_user(frame->extramask, &set->sig[1],
- sizeof(frame->extramask)))
- return -EFAULT;
- }
+ frame = get_sigframe(ksig, regs, sizeof(*frame), &fp);
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
restorer = ksig->ka.sa.sa_restorer;
@@ -294,19 +262,20 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
restorer = &frame->retcode;
}
- put_user_try {
- put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
-
- /*
- * These are actually not used anymore, but left because some
- * gdb versions depend on them as a marker.
- */
- put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
- } put_user_catch(err);
-
- if (err)
+ if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
+ unsafe_put_user(sig, &frame->sig, Efault);
+ unsafe_put_sigcontext32(&frame->sc, fp, regs, set, Efault);
+ unsafe_put_user(set->sig[1], &frame->extramask[0], Efault);
+ unsafe_put_user(ptr_to_compat(restorer), &frame->pretcode, Efault);
+ /*
+ * These are actually not used anymore, but left because some
+ * gdb versions depend on them as a marker.
+ */
+ unsafe_put_user(*((u64 *)&code), (u64 __user *)frame->retcode, Efault);
+ user_access_end();
+
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
@@ -323,6 +292,9 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
regs->ss = __USER32_DS;
return 0;
+Efault:
+ user_access_end();
+ return -EFAULT;
}
int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
@@ -330,10 +302,9 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
{
struct rt_sigframe_ia32 __user *frame;
void __user *restorer;
- int err = 0;
- void __user *fpstate = NULL;
+ void __user *fp = NULL;
- /* __copy_to_user optimizes that into a single 8 byte store */
+ /* unsafe_put_user optimizes that into a single 8 byte store */
static const struct {
u8 movl;
u32 val;
@@ -346,44 +317,40 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
0,
};
- frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
+ frame = get_sigframe(ksig, regs, sizeof(*frame), &fp);
- if (!access_ok(frame, sizeof(*frame)))
+ if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
- put_user_try {
- put_user_ex(sig, &frame->sig);
- put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo);
- put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
+ unsafe_put_user(sig, &frame->sig, Efault);
+ unsafe_put_user(ptr_to_compat(&frame->info), &frame->pinfo, Efault);
+ unsafe_put_user(ptr_to_compat(&frame->uc), &frame->puc, Efault);
- /* Create the ucontext. */
- if (static_cpu_has(X86_FEATURE_XSAVE))
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
- put_user_ex(0, &frame->uc.uc_link);
- compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
+ /* Create the ucontext. */
+ if (static_cpu_has(X86_FEATURE_XSAVE))
+ unsafe_put_user(UC_FP_XSTATE, &frame->uc.uc_flags, Efault);
+ else
+ unsafe_put_user(0, &frame->uc.uc_flags, Efault);
+ unsafe_put_user(0, &frame->uc.uc_link, Efault);
+ unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
- if (ksig->ka.sa.sa_flags & SA_RESTORER)
- restorer = ksig->ka.sa.sa_restorer;
- else
- restorer = current->mm->context.vdso +
- vdso_image_32.sym___kernel_rt_sigreturn;
- put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
-
- /*
- * Not actually used anymore, but left because some gdb
- * versions need it.
- */
- put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
- } put_user_catch(err);
-
- err |= __copy_siginfo_to_user32(&frame->info, &ksig->info, false);
- err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
- if (err)
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ restorer = ksig->ka.sa.sa_restorer;
+ else
+ restorer = current->mm->context.vdso +
+ vdso_image_32.sym___kernel_rt_sigreturn;
+ unsafe_put_user(ptr_to_compat(restorer), &frame->pretcode, Efault);
+
+ /*
+ * Not actually used anymore, but left because some gdb
+ * versions need it.
+ */
+ unsafe_put_user(*((u64 *)&code), (u64 __user *)frame->retcode, Efault);
+ unsafe_put_sigcontext32(&frame->uc.uc_mcontext, fp, regs, set, Efault);
+ unsafe_put_user(*(__u64 *)set, (__u64 *)&frame->uc.uc_sigmask, Efault);
+ user_access_end();
+
+ if (__copy_siginfo_to_user32(&frame->info, &ksig->info, false))
return -EFAULT;
/* Set up registers for signal handler */
@@ -402,4 +369,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
regs->ss = __USER32_DS;
return 0;
+Efault:
+ user_access_end();
+ return -EFAULT;
}
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 1ae4e5791afa..c7df20e78b09 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -12,7 +12,6 @@ struct amd_nb_bus_dev_range {
u8 dev_limit;
};
-extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
extern bool early_is_amd_nb(u32 value);
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index cd339b88d5d4..0f63585edf5f 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -138,9 +138,6 @@
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
-# define _ASM_EXTABLE_EX(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
_ASM_ALIGN ; \
@@ -166,9 +163,6 @@
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
-# define _ASM_EXTABLE_EX(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index dc4cfc888d6d..dc9dc7b3911a 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -4,14 +4,18 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#define VCLOCK_NONE 0 /* No vDSO clock available. */
-#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
-#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
-#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */
-#define VCLOCK_MAX 3
+#include <asm/vdso/clocksource.h>
-struct arch_clocksource_data {
- int vclock_mode;
-};
+extern unsigned int vclocks_used;
+
+static inline bool vclock_was_used(int vclock)
+{
+ return READ_ONCE(vclocks_used) & (1U << vclock);
+}
+
+static inline void vclocks_set_used(unsigned int which)
+{
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << which));
+}
#endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index adc6cc86b062..ff6f3ca649b3 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -40,4 +40,16 @@ int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
+#ifdef CONFIG_CPU_SUP_INTEL
+extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
+extern void switch_to_sld(unsigned long tifn);
+extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
+#else
+static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
+static inline void switch_to_sld(unsigned long tifn) {}
+static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+{
+ return false;
+}
+#endif
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index 31c379c1da41..cf3d621c6892 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -5,9 +5,139 @@
/*
* Declare drivers belonging to specific x86 CPUs
* Similar in spirit to pci_device_id and related PCI functions
+ *
+ * The wildcard initializers are in mod_devicetable.h because
+ * file2alias needs them. Sigh.
*/
-
#include <linux/mod_devicetable.h>
+/* Get the INTEL_FAM* model defines */
+#include <asm/intel-family.h>
+/* And the X86_VENDOR_* ones */
+#include <asm/processor.h>
+
+/* Centaur FAM6 models */
+#define X86_CENTAUR_FAM6_C7_A 0xa
+#define X86_CENTAUR_FAM6_C7_D 0xd
+#define X86_CENTAUR_FAM6_NANO 0xf
+
+/**
+ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching
+ * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@_vendor
+ * @_family: The family number or X86_FAMILY_ANY
+ * @_model: The model number, model constant or X86_MODEL_ANY
+ * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
+ * @_data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * Use only if you need all selectors. Otherwise use one of the shorter
+ * macros of the X86_MATCH_* family. If there is no matching shorthand
+ * macro, consider to add one. If you really need to wrap one of the macros
+ * into another macro at the usage site for good reasons, then please
+ * start this local macro with X86_MATCH to allow easy grepping.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \
+ _feature, _data) { \
+ .vendor = X86_VENDOR_##_vendor, \
+ .family = _family, \
+ .model = _model, \
+ .feature = _feature, \
+ .driver_data = (unsigned long) _data \
+}
+
+/**
+ * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \
+ X86_MODEL_ANY, feature, data)
+
+/**
+ * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
+ X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data)
+
+/**
+ * X86_MATCH_FEATURE - Macro for matching a CPU feature
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_FEATURE(feature, data) \
+ X86_MATCH_VENDOR_FEATURE(ANY, feature, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @model: The model number, model constant or X86_MODEL_ANY
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \
+ X86_FEATURE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM - Match vendor and family
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
+ * set of wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
+
+/**
+ * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model
+ * @model: The model name without the INTEL_FAM6_ prefix or ANY
+ * The model name is expanded to INTEL_FAM6_@model internally
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * The vendor is set to INTEL, the family to 6 and all other missing
+ * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards.
+ *
+ * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information.
+ */
+#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
+ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
/*
* Match specific microcode revisions.
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f3327cb56edf..db189945e9b0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -217,7 +217,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -285,6 +285,7 @@
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -299,6 +300,7 @@
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
@@ -367,6 +369,7 @@
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/*
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index ae391f609840..f71a0cce9373 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -42,8 +42,8 @@
* Emit CFI data in .debug_frame sections, not .eh_frame sections.
* The latter we currently just discard since we don't do DWARF
* unwinding at runtime. So only the offline DWARF information is
- * useful to anyone. Note we should not use this directive if
- * vmlinux.lds.S gets changed so it doesn't discard .eh_frame.
+ * useful to anyone. Note we should not use this directive if we
+ * ever decide to enable DWARF unwinding at runtime.
*/
.cfi_sections .debug_frame
#else
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 86169a24b0d8..cdcf48d52a12 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -10,6 +10,8 @@
#include <asm/mmu_context.h>
#include <linux/build_bug.h>
+extern unsigned long efi_fw_vendor, efi_config_table;
+
/*
* We map the EFI regions needed for runtime services non-contiguously,
* with preserved alignment on virtual addresses starting from -4G down
@@ -34,8 +36,6 @@ static inline bool efi_have_uv1_memmap(void)
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
-#define MAX_CMDLINE_ADDRESS UINT_MAX
-
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
/*
@@ -180,7 +180,6 @@ extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd);
struct efi_setup_data {
u64 fw_vendor;
- u64 runtime;
u64 tables;
u64 smbios;
u64 reserved[8];
@@ -219,7 +218,8 @@ extern void efi_thunk_runtime_setup(void);
efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
unsigned long descriptor_size,
u32 descriptor_version,
- efi_memory_desc_t *virtual_map);
+ efi_memory_desc_t *virtual_map,
+ unsigned long systab_phys);
/* arch specific definitions used by the stub code */
@@ -270,6 +270,11 @@ static inline void *efi64_zero_upper(void *p)
return p;
}
+static inline u32 efi64_convert_status(efi_status_t status)
+{
+ return (u32)(status | (u64)status >> 32);
+}
+
#define __efi64_argmap_free_pages(addr, size) \
((addr), 0, (size))
@@ -285,11 +290,21 @@ static inline void *efi64_zero_upper(void *p)
#define __efi64_argmap_locate_protocol(protocol, reg, interface) \
((protocol), (reg), efi64_zero_upper(interface))
+#define __efi64_argmap_locate_device_path(protocol, path, handle) \
+ ((protocol), (path), efi64_zero_upper(handle))
+
+#define __efi64_argmap_exit(handle, status, size, data) \
+ ((handle), efi64_convert_status(status), (size), (data))
+
/* PCI I/O */
#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \
((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \
efi64_zero_upper(dev), efi64_zero_upper(func))
+/* LoadFile */
+#define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \
+ ((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf))
+
/*
* The macros below handle the plumbing for the argument mapping. To add a
* mapping for a specific EFI method, simply define a macro
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 13c83fe97988..f9c00110a69a 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -12,76 +12,103 @@
#include <asm/processor.h>
#include <asm/smap.h>
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
- asm volatile("\t" ASM_STAC "\n" \
- "1:\t" insn "\n" \
- "2:\t" ASM_CLAC "\n" \
+#define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \
+do { \
+ int oldval = 0, ret; \
+ asm volatile("1:\t" insn "\n" \
+ "2:\n" \
"\t.section .fixup,\"ax\"\n" \
"3:\tmov\t%3, %1\n" \
"\tjmp\t2b\n" \
"\t.previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
- : "i" (-EFAULT), "0" (oparg), "1" (0))
+ : "i" (-EFAULT), "0" (oparg), "1" (0)); \
+ if (ret) \
+ goto label; \
+ *oval = oldval; \
+} while(0)
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
- asm volatile("\t" ASM_STAC "\n" \
- "1:\tmovl %2, %0\n" \
- "\tmovl\t%0, %3\n" \
+
+#define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \
+do { \
+ int oldval = 0, ret, tem; \
+ asm volatile("1:\tmovl %2, %0\n" \
+ "2:\tmovl\t%0, %3\n" \
"\t" insn "\n" \
- "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
- "\tjnz\t1b\n" \
- "3:\t" ASM_CLAC "\n" \
+ "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
+ "\tjnz\t2b\n" \
+ "4:\n" \
"\t.section .fixup,\"ax\"\n" \
- "4:\tmov\t%5, %1\n" \
- "\tjmp\t3b\n" \
+ "5:\tmov\t%5, %1\n" \
+ "\tjmp\t4b\n" \
"\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 4b) \
- _ASM_EXTABLE_UA(2b, 4b) \
+ _ASM_EXTABLE_UA(1b, 5b) \
+ _ASM_EXTABLE_UA(3b, 5b) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "1" (0))
+ : "r" (oparg), "i" (-EFAULT), "1" (0)); \
+ if (ret) \
+ goto label; \
+ *oval = oldval; \
+} while(0)
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
- int oldval = 0, ret, tem;
-
- pagefault_disable();
+ if (!user_access_begin(uaddr, sizeof(u32)))
+ return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
+ unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
- uaddr, oparg);
+ unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval,
+ uaddr, oparg, Efault);
break;
case FUTEX_OP_OR:
- __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
+ unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
+ unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
+ unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault);
break;
default:
- ret = -ENOSYS;
+ user_access_end();
+ return -ENOSYS;
}
-
- pagefault_enable();
-
- if (!ret)
- *oval = oldval;
-
- return ret;
+ user_access_end();
+ return 0;
+Efault:
+ user_access_end();
+ return -EFAULT;
}
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
- return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval);
+ int ret = 0;
+
+ if (!user_access_begin(uaddr, sizeof(u32)))
+ return -EFAULT;
+ asm volatile("\n"
+ "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
+ "2:\n"
+ "\t.section .fixup, \"ax\"\n"
+ "3:\tmov %3, %0\n"
+ "\tjmp 2b\n"
+ "\t.previous\n"
+ _ASM_EXTABLE_UA(1b, 3b)
+ : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+ : "i" (-EFAULT), "r" (newval), "1" (oldval)
+ : "memory"
+ );
+ user_access_end();
+ *uval = oldval;
+ return ret;
}
#endif
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 4981c293f926..8f1e94f29a16 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -35,6 +35,9 @@
* The #define line may optionally include a comment including platform names.
*/
+/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
+#define INTEL_FAM6_ANY X86_MODEL_ANY
+
#define INTEL_FAM6_CORE_YONAH 0x0E
#define INTEL_FAM6_CORE2_MEROM 0x0F
@@ -118,17 +121,7 @@
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
-/* Useful macros */
-#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \
-{ \
- .vendor = X86_VENDOR_INTEL, \
- .family = _family, \
- .model = _model, \
- .feature = X86_FEATURE_ANY, \
- .driver_data = (kernel_ulong_t)&_driver_data \
-}
-
-#define INTEL_CPU_FAM6(_model, _driver_data) \
- INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data)
+/* Family 5 */
+#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
index 02c6ef8f7667..07344d82e88e 100644
--- a/arch/x86/include/asm/io_bitmap.h
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -19,7 +19,14 @@ struct task_struct;
void io_bitmap_share(struct task_struct *tsk);
void io_bitmap_exit(void);
-void tss_update_io_bitmap(void);
+void native_tss_update_io_bitmap(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#define tss_update_io_bitmap native_tss_update_io_bitmap
+#endif
+
#else
static inline void io_bitmap_share(struct task_struct *tsk) { }
static inline void io_bitmap_exit(void) { }
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index a176f6165d85..72fba0eeeb30 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,7 +36,7 @@ extern void native_init_IRQ(void);
extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs);
-extern __visible unsigned int do_IRQ(struct pt_regs *regs);
+extern __visible void do_IRQ(struct pt_regs *regs);
extern void init_ISA_irqs(void);
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 95b1f053bd96..073eb7ad2f56 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -36,6 +36,7 @@ typedef u8 kprobe_opcode_t;
/* optinsn template addresses */
extern __visible kprobe_opcode_t optprobe_template_entry[];
+extern __visible kprobe_opcode_t optprobe_template_clac[];
extern __visible kprobe_opcode_t optprobe_template_val[];
extern __visible kprobe_opcode_t optprobe_template_call[];
extern __visible kprobe_opcode_t optprobe_template_end[];
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 03946eb3e2b9..c06e8353efd3 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -292,6 +292,14 @@ enum x86emul_mode {
#define X86EMUL_SMM_MASK (1 << 6)
#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
+/*
+ * fastop functions are declared as taking a never-defined fastop parameter,
+ * so they can't be called from C directly.
+ */
+struct fastop;
+
+typedef void (*fastop_t)(struct fastop *);
+
struct x86_emulate_ctxt {
const struct x86_emulate_ops *ops;
@@ -324,7 +332,10 @@ struct x86_emulate_ctxt {
struct operand src;
struct operand src2;
struct operand dst;
- int (*execute)(struct x86_emulate_ctxt *ctxt);
+ union {
+ int (*execute)(struct x86_emulate_ctxt *ctxt);
+ fastop_t fop;
+ };
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
/*
* The following six fields are cleared together,
@@ -349,7 +360,6 @@ struct x86_emulate_ctxt {
u64 d;
unsigned long _eip;
struct operand memop;
- /* Fields above regs are cleared together. */
unsigned long _regs[NR_VCPU_REGS];
struct operand *memopp;
struct fetch_cache fetch;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4dffbc10d3f8..98959e8cd448 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -781,9 +781,19 @@ struct kvm_vcpu_arch {
u64 msr_kvm_poll_control;
/*
- * Indicate whether the access faults on its page table in guest
- * which is set when fix page fault and used to detect unhandeable
- * instruction.
+ * Indicates the guest is trying to write a gfn that contains one or
+ * more of the PTEs used to translate the write itself, i.e. the access
+ * is changing its own translation in the guest page tables. KVM exits
+ * to userspace if emulation of the faulting instruction fails and this
+ * flag is set, as KVM cannot make forward progress.
+ *
+ * If emulation fails for a write to guest page tables, KVM unprotects
+ * (zaps) the shadow page for the target gfn and resumes the guest to
+ * retry the non-emulatable instruction (on hardware). Unprotecting the
+ * gfn doesn't allow forward progress for a self-changing access because
+ * doing so also zaps the translation for the gfn, i.e. retrying the
+ * instruction will hit a !PRESENT fault, which results in a new shadow
+ * page and sends KVM back to square one.
*/
bool write_fault_to_shadow_pgtable;
@@ -1112,6 +1122,7 @@ struct kvm_x86_ops {
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
@@ -1136,7 +1147,7 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4359b955e0b7..f9cea081c05b 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -102,7 +102,7 @@
#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
-#define MCE_LOG_LEN 32
+#define MCE_LOG_MIN_LEN 32U
#define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */
@@ -135,11 +135,11 @@
*/
struct mce_log_buffer {
char signature[12]; /* "MACHINECHECK" */
- unsigned len; /* = MCE_LOG_LEN */
+ unsigned len; /* = elements in .mce_entry[] */
unsigned next;
unsigned flags;
unsigned recordlen; /* length of struct mce */
- struct mce entry[MCE_LOG_LEN];
+ struct mce entry[];
};
enum mce_notifier_prios {
@@ -238,9 +238,6 @@ extern void mce_disable_bank(int bank);
/*
* Exception handler
*/
-
-/* Call the installed machine check handler for this CPU setup. */
-extern void (*machine_check_vector)(struct pt_regs *, long error_code);
void do_machine_check(struct pt_regs *, long);
/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 6b79515abb82..edc2c581704a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -46,7 +46,9 @@ typedef int (*hyperv_fill_flush_list_func)(
#define hv_set_reference_tsc(val) \
wrmsrl(HV_X64_MSR_REFERENCE_TSC, val)
#define hv_set_clocksource_vdso(val) \
- ((val).archdata.vclock_mode = VCLOCK_HVCLOCK)
+ ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK)
+#define hv_enable_vdso_clocksource() \
+ vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
#define hv_get_raw_timer() rdtsc_ordered()
void hyperv_callback_vector(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ebe1685e92dd..12c9684d59ba 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,10 @@
/* Intel MSRs. Some also available on other CPUs */
+#define MSR_TEST_CTRL 0x00000033
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
+
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
@@ -70,6 +74,11 @@
*/
#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
+/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
+#define MSR_IA32_CORE_CAPS 0x000000cf
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
+
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
@@ -512,6 +521,8 @@
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT 30
+#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 9d5252c9685c..b809f117f3f4 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -23,6 +23,8 @@
#define MWAITX_MAX_LOOPS ((u32)-1)
#define MWAITX_DISABLE_CSTATES 0xf0
+u32 get_umwait_control_msr(void);
+
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
{
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 86e7317eb31f..694d8daf4983 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_update_io_bitmap(void)
+{
+ PVOP_VCALL0(cpu.update_io_bitmap);
+}
+#endif
+
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 84812964d3dd..732f62e04ddb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,6 +140,10 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
+#ifdef CONFIG_X86_IOPL_IOPERM
+ void (*update_io_bitmap)(void);
+#endif
+
void (*wbinvd)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 29964b0e1075..e855e9cf2c37 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -50,11 +50,22 @@
#define AMD64_L3_SLICE_SHIFT 48
#define AMD64_L3_SLICE_MASK \
- ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+ (0xFULL << AMD64_L3_SLICE_SHIFT)
+#define AMD64_L3_SLICEID_MASK \
+ (0x7ULL << AMD64_L3_SLICE_SHIFT)
#define AMD64_L3_THREAD_SHIFT 56
#define AMD64_L3_THREAD_MASK \
- ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+ (0xFFULL << AMD64_L3_THREAD_SHIFT)
+#define AMD64_L3_F19H_THREAD_MASK \
+ (0x3ULL << AMD64_L3_THREAD_SHIFT)
+
+#define AMD64_L3_EN_ALL_CORES BIT_ULL(47)
+#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46)
+
+#define AMD64_L3_COREID_SHIFT 42
+#define AMD64_L3_COREID_MASK \
+ (0x7ULL << AMD64_L3_COREID_SHIFT)
#define X86_RAW_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_EVENT | \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7e118660bbd9..afda66a6d325 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -595,12 +595,6 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
-static inline pud_t pud_mknotpresent(pud_t pud)
-{
- return pfn_pud(pud_pfn(pud),
- __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
-}
-
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -627,12 +621,15 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return __pmd(val);
}
-/* mprotect needs to preserve PAT bits when updating vm_page_prot */
+/*
+ * mprotect needs to preserve PAT and encryption bits when updating
+ * vm_page_prot
+ */
#define pgprot_modify pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
{
pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
- pgprotval_t addbits = pgprot_val(newprot);
+ pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK;
return __pgprot(preservebits | addbits);
}
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0239998d8cdc..65c2ecd730c5 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -118,7 +118,7 @@
*/
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
/*
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 19b137f1b3be..2ff9b98812b7 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -4,6 +4,11 @@
#define ARCH_DEFAULT_PKEY 0
+/*
+ * If more than 16 keys are ever supported, a thorough audit
+ * will be necessary to ensure that the types that store key
+ * numbers and masks have sufficient capacity.
+ */
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 3d4cb83a8828..69485ca13665 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -103,14 +103,14 @@ static __always_inline bool should_resched(int preempt_offset)
}
#ifdef CONFIG_PREEMPTION
- extern asmlinkage void ___preempt_schedule(void);
+ extern asmlinkage void preempt_schedule_thunk(void);
# define __preempt_schedule() \
- asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
+ asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT)
extern asmlinkage void preempt_schedule(void);
- extern asmlinkage void ___preempt_schedule_notrace(void);
+ extern asmlinkage void preempt_schedule_notrace_thunk(void);
# define __preempt_schedule_notrace() \
- asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+ asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT)
extern asmlinkage void preempt_schedule_notrace(void);
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 09705ccc393c..3bcf27caf6c9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -26,6 +26,7 @@ struct vm86;
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
#include <asm/vmxfeatures.h>
+#include <asm/vdso/processor.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -541,7 +542,6 @@ struct thread_struct {
mm_segment_t addr_limit;
unsigned int sig_on_uaccess_err:1;
- unsigned int uaccess_err:1; /* uaccess failed */
/* Floating point and extended processor state */
struct fpu fpu;
@@ -677,17 +677,6 @@ static inline unsigned int cpuid_edx(unsigned int op)
return edx;
}
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static __always_inline void rep_nop(void)
-{
- asm volatile("rep; nop" ::: "memory");
-}
-
-static __always_inline void cpu_relax(void)
-{
- rep_nop();
-}
-
/*
* This function forces the icache and prefetched instruction stream to
* catch up with reality in two very specific cases:
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 036c360910c5..a6e8373a5170 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_SECTIONS_H
#define _ASM_X86_SECTIONS_H
+#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+
#include <asm-generic/sections.h>
#include <asm/extable.h>
@@ -14,4 +16,22 @@ extern char __end_rodata_hpage_align[];
extern char __end_of_kernel_reserve[];
+extern unsigned long _brk_start, _brk_end;
+
+static inline bool arch_is_kernel_initmem_freed(unsigned long addr)
+{
+ /*
+ * If _brk_start has not been cleared, brk allocation is incomplete,
+ * and we can not make assumptions about its use.
+ */
+ if (_brk_start)
+ return 0;
+
+ /*
+ * After brk allocation is complete, space between _brk_end and _end
+ * is available for allocation.
+ */
+ return addr >= _brk_end && addr < (unsigned long)&_end;
+}
+
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 64c3dce374e5..950532ccbc4a 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -46,6 +46,8 @@ int set_memory_4k(unsigned long addr, int numpages);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
int set_memory_np_noalias(unsigned long addr, int numpages);
+int set_memory_nonglobal(unsigned long addr, int numpages);
+int set_memory_global(unsigned long addr, int numpages);
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wc(struct page **pages, int addrinarray);
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index f176114c04d4..84eab2724875 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -33,11 +33,7 @@ struct sigframe_ia32 {
* legacy application accessing/modifying it.
*/
struct _fpstate_32 fpstate_unused;
-#ifdef CONFIG_IA32_EMULATION
- unsigned int extramask[_COMPAT_NSIG_WORDS-1];
-#else /* !CONFIG_IA32_EMULATION */
- unsigned long extramask[_NSIG_WORDS-1];
-#endif /* CONFIG_IA32_EMULATION */
+ unsigned int extramask[1];
char retcode[8];
/* fp state follows here */
};
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 2fcbd6f33ef7..65e667279e0f 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -14,12 +14,5 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
- struct pt_regs *regs, unsigned long mask);
-
-
-#ifdef CONFIG_X86_X32_ABI
-asmlinkage long sys32_x32_rt_sigreturn(void);
-#endif
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 8db3fdb6102e..7cbf733d11af 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,23 +13,14 @@
#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
-#include <asm/asm-offsets.h> /* For NR_syscalls */
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
-#ifdef CONFIG_X86_64
-typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
-#else
-typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
- unsigned long, unsigned long,
- unsigned long, unsigned long);
-#endif /* CONFIG_X86_64 */
+typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
#if defined(CONFIG_X86_32)
#define ia32_sys_call_table sys_call_table
-#define __NR_syscall_compat_max __NR_syscall_max
-#define IA32_NR_syscalls NR_syscalls
#endif
#if defined(CONFIG_IA32_EMULATION)
@@ -168,6 +159,11 @@ static inline int syscall_get_arch(struct task_struct *task)
task->thread_info.status & TS_COMPAT)
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
+
+void do_syscall_64(unsigned long nr, struct pt_regs *regs);
+void do_int80_syscall_32(struct pt_regs *regs);
+long do_fast_syscall_32(struct pt_regs *regs);
+
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e2389ce9bf58..a84333adeef2 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -8,6 +8,50 @@
struct pt_regs;
+extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
+extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
+
+/*
+ * Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes
+ * struct pt_regs *regs as the only argument of the syscall stub(s) named as:
+ * __x64_sys_*() - 64-bit native syscall
+ * __ia32_sys_*() - 32-bit native syscall or common compat syscall
+ * __ia32_compat_sys_*() - 32-bit compat syscall
+ * __x32_compat_sys_*() - 64-bit X32 compat syscall
+ *
+ * The registers are decoded according to the ABI:
+ * 64-bit: RDI, RSI, RDX, R10, R8, R9
+ * 32-bit: EBX, ECX, EDX, ESI, EDI, EBP
+ *
+ * The stub then passes the decoded arguments to the __se_sys_*() wrapper to
+ * perform sign-extension (omitted for zero-argument syscalls). Finally the
+ * arguments are passed to the __do_sys_*() function which is the actual
+ * syscall. These wrappers are marked as inline so the compiler can optimize
+ * the functions where appropriate.
+ *
+ * Example assembly (slightly re-ordered for better readability):
+ *
+ * <__x64_sys_recv>: <-- syscall with 4 parameters
+ * callq <__fentry__>
+ *
+ * mov 0x70(%rdi),%rdi <-- decode regs->di
+ * mov 0x68(%rdi),%rsi <-- decode regs->si
+ * mov 0x60(%rdi),%rdx <-- decode regs->dx
+ * mov 0x38(%rdi),%rcx <-- decode regs->r10
+ *
+ * xor %r9d,%r9d <-- clear %r9
+ * xor %r8d,%r8d <-- clear %r8
+ *
+ * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
+ * which takes 6 arguments
+ *
+ * cltq <-- extend return value to 64-bit
+ * retq <-- return
+ *
+ * This approach avoids leaking random user-provided register content down
+ * the call chain.
+ */
+
/* Mapping of registers to parameters for syscalls on x86-64 and x32 */
#define SC_X86_64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
@@ -21,68 +65,96 @@ struct pt_regs;
,,(unsigned int)regs->dx,,(unsigned int)regs->si \
,,(unsigned int)regs->di,,(unsigned int)regs->bp)
-#ifdef CONFIG_IA32_EMULATION
-/*
- * For IA32 emulation, we need to handle "compat" syscalls *and* create
- * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
- * ia32 regs in the proper order for shared or "common" syscalls. As some
- * syscalls may not be implemented, we need to expand COND_SYSCALL in
- * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
- * case as well.
- */
-#define __IA32_COMPAT_SYS_STUB0(x, name) \
- asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\
- ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \
- asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\
+#define __SYS_STUB0(abi, name) \
+ long __##abi##_##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
+ long __##abi##_##name(const struct pt_regs *regs) \
+ __alias(__do_##name);
+
+#define __SYS_STUBx(abi, name, ...) \
+ long __##abi##_##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
+ long __##abi##_##name(const struct pt_regs *regs) \
{ \
- return __se_compat_sys_##name(); \
+ return __se_##name(__VA_ARGS__); \
}
-#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \
- asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
- ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \
- asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
+#define __COND_SYSCALL(abi, name) \
+ __weak long __##abi##_##name(const struct pt_regs *__unused) \
{ \
- return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
+ return sys_ni_syscall(); \
}
+#define __SYS_NI(abi, name) \
+ SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers);
+
+#ifdef CONFIG_X86_64
+#define __X64_SYS_STUB0(name) \
+ __SYS_STUB0(x64, sys_##name)
+
+#define __X64_SYS_STUBx(x, name, ...) \
+ __SYS_STUBx(x64, sys##name, \
+ SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __X64_COND_SYSCALL(name) \
+ __COND_SYSCALL(x64, sys_##name)
+
+#define __X64_SYS_NI(name) \
+ __SYS_NI(x64, sys_##name)
+#else /* CONFIG_X86_64 */
+#define __X64_SYS_STUB0(name)
+#define __X64_SYS_STUBx(x, name, ...)
+#define __X64_COND_SYSCALL(name)
+#define __X64_SYS_NI(name)
+#endif /* CONFIG_X86_64 */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+#define __IA32_SYS_STUB0(name) \
+ __SYS_STUB0(ia32, sys_##name)
+
#define __IA32_SYS_STUBx(x, name, ...) \
- asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \
- asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \
- { \
- return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
- }
+ __SYS_STUBx(ia32, sys##name, \
+ SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __IA32_COND_SYSCALL(name) \
+ __COND_SYSCALL(ia32, sys_##name)
+#define __IA32_SYS_NI(name) \
+ __SYS_NI(ia32, sys_##name)
+#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+#define __IA32_SYS_STUB0(name)
+#define __IA32_SYS_STUBx(x, name, ...)
+#define __IA32_COND_SYSCALL(name)
+#define __IA32_SYS_NI(name)
+#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+
+#ifdef CONFIG_IA32_EMULATION
/*
- * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
- * named __ia32_sys_*()
+ * For IA32 emulation, we need to handle "compat" syscalls *and* create
+ * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
+ * ia32 regs in the proper order for shared or "common" syscalls. As some
+ * syscalls may not be implemented, we need to expand COND_SYSCALL in
+ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
+ * case as well.
*/
+#define __IA32_COMPAT_SYS_STUB0(name) \
+ __SYS_STUB0(ia32, compat_sys_##name)
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \
- asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
+#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \
+ __SYS_STUBx(ia32, compat_sys##name, \
+ SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
-#define COND_SYSCALL(name) \
- asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \
- { \
- return sys_ni_syscall(); \
- } \
- asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\
- { \
- return sys_ni_syscall(); \
- }
+#define __IA32_COMPAT_COND_SYSCALL(name) \
+ __COND_SYSCALL(ia32, compat_sys_##name)
-#define SYS_NI(name) \
- SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \
- SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers)
+#define __IA32_COMPAT_SYS_NI(name) \
+ __SYS_NI(ia32, compat_sys_##name)
#else /* CONFIG_IA32_EMULATION */
+#define __IA32_COMPAT_SYS_STUB0(name)
#define __IA32_COMPAT_SYS_STUBx(x, name, ...)
-#define __IA32_SYS_STUBx(x, fullname, name, ...)
+#define __IA32_COMPAT_COND_SYSCALL(name)
+#define __IA32_COMPAT_SYS_NI(name)
#endif /* CONFIG_IA32_EMULATION */
@@ -92,25 +164,23 @@ struct pt_regs;
* of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
* with x86_64 obviously do not need such care.
*/
-#define __X32_COMPAT_SYS_STUB0(x, name, ...) \
- asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\
- ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \
- asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\
- { \
- return __se_compat_sys_##name();\
- }
+#define __X32_COMPAT_SYS_STUB0(name) \
+ __SYS_STUB0(x32, compat_sys_##name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...) \
- asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
- ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \
- asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
- { \
- return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
- }
+ __SYS_STUBx(x32, compat_sys##name, \
+ SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __X32_COMPAT_COND_SYSCALL(name) \
+ __COND_SYSCALL(x32, compat_sys_##name)
+#define __X32_COMPAT_SYS_NI(name) \
+ __SYS_NI(x32, compat_sys_##name)
#else /* CONFIG_X86_X32 */
-#define __X32_COMPAT_SYS_STUB0(x, name)
+#define __X32_COMPAT_SYS_STUB0(name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
+#define __X32_COMPAT_COND_SYSCALL(name)
+#define __X32_COMPAT_SYS_NI(name)
#endif /* CONFIG_X86_X32 */
@@ -121,15 +191,12 @@ struct pt_regs;
* of them.
*/
#define COMPAT_SYSCALL_DEFINE0(name) \
- static long __se_compat_sys_##name(void); \
- static inline long __do_compat_sys_##name(void); \
- __IA32_COMPAT_SYS_STUB0(x, name) \
- __X32_COMPAT_SYS_STUB0(x, name) \
- static long __se_compat_sys_##name(void) \
- { \
- return __do_compat_sys_##name(); \
- } \
- static inline long __do_compat_sys_##name(void)
+ static long \
+ __do_compat_sys_##name(const struct pt_regs *__unused); \
+ __IA32_COMPAT_SYS_STUB0(name) \
+ __X32_COMPAT_SYS_STUB0(name) \
+ static long \
+ __do_compat_sys_##name(const struct pt_regs *__unused)
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
@@ -148,58 +215,19 @@ struct pt_regs;
* kernel/time/posix-stubs.c to cover this case as well.
*/
#define COND_SYSCALL_COMPAT(name) \
- cond_syscall(__ia32_compat_sys_##name); \
- cond_syscall(__x32_compat_sys_##name)
+ __IA32_COMPAT_COND_SYSCALL(name) \
+ __X32_COMPAT_COND_SYSCALL(name)
#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers); \
- SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers)
+ __IA32_COMPAT_SYS_NI(name) \
+ __X32_COMPAT_SYS_NI(name)
#endif /* CONFIG_COMPAT */
-
-/*
- * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes
- * struct pt_regs *regs as the only argument of the syscall stub named
- * __x64_sys_*(). It decodes just the registers it needs and passes them on to
- * the __se_sys_*() wrapper performing sign extension and then to the
- * __do_sys_*() function doing the actual job. These wrappers and functions
- * are inlined (at least in very most cases), meaning that the assembly looks
- * as follows (slightly re-ordered for better readability):
- *
- * <__x64_sys_recv>: <-- syscall with 4 parameters
- * callq <__fentry__>
- *
- * mov 0x70(%rdi),%rdi <-- decode regs->di
- * mov 0x68(%rdi),%rsi <-- decode regs->si
- * mov 0x60(%rdi),%rdx <-- decode regs->dx
- * mov 0x38(%rdi),%rcx <-- decode regs->r10
- *
- * xor %r9d,%r9d <-- clear %r9
- * xor %r8d,%r8d <-- clear %r8
- *
- * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
- * which takes 6 arguments
- *
- * cltq <-- extend return value to 64-bit
- * retq <-- return
- *
- * This approach avoids leaking random user-provided register content down
- * the call chain.
- *
- * If IA32_EMULATION is enabled, this macro generates an additional wrapper
- * named __ia32_sys_*() which decodes the struct pt_regs *regs according
- * to the i386 calling convention (bx, cx, dx, si, di, bp).
- */
#define __SYSCALL_DEFINEx(x, name, ...) \
- asmlinkage long __x64_sys##name(const struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
- asmlinkage long __x64_sys##name(const struct pt_regs *regs) \
- { \
- return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ __X64_SYS_STUBx(x, name, __VA_ARGS__) \
__IA32_SYS_STUBx(x, name, __VA_ARGS__) \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
@@ -217,33 +245,28 @@ struct pt_regs;
* SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI()
* macros to work correctly.
*/
-#ifndef SYSCALL_DEFINE0
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
-#endif
-
-#ifndef COND_SYSCALL
-#define COND_SYSCALL(name) \
- asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \
- { \
- return sys_ni_syscall(); \
- }
-#endif
+ static long __do_sys_##sname(const struct pt_regs *__unused); \
+ __X64_SYS_STUB0(sname) \
+ __IA32_SYS_STUB0(sname) \
+ static long __do_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name) \
+ __X64_COND_SYSCALL(name) \
+ __IA32_COND_SYSCALL(name)
-#ifndef SYS_NI
-#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);
-#endif
+#define SYS_NI(name) \
+ __X64_SYS_NI(name) \
+ __IA32_SYS_NI(name)
/*
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
-asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
-asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
-asmlinkage long __x64_sys_time(const struct pt_regs *regs);
+long __x64_sys_getcpu(const struct pt_regs *regs);
+long __x64_sys_gettimeofday(const struct pt_regs *regs);
+long __x64_sys_time(const struct pt_regs *regs);
#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91b7b6e1a115..6714a358235d 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -8,42 +8,8 @@
#ifndef _ASM_X86_SYSCALLS_H
#define _ASM_X86_SYSCALLS_H
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <linux/signal.h>
-#include <linux/types.h>
-
/* Common in X86_32 and X86_64 */
/* kernel/ioport.c */
long ksys_ioperm(unsigned long from, unsigned long num, int turn_on);
-#ifdef CONFIG_X86_32
-/*
- * These definitions are only valid on pure 32-bit systems; x86-64 uses a
- * different syscall calling convention
- */
-asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
-asmlinkage long sys_iopl(unsigned int);
-
-/* kernel/ldt.c */
-asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
-
-/* kernel/signal.c */
-asmlinkage long sys_rt_sigreturn(void);
-
-/* kernel/tls.c */
-asmlinkage long sys_set_thread_area(struct user_desc __user *);
-asmlinkage long sys_get_thread_area(struct user_desc __user *);
-
-/* X86_32 only */
-
-/* kernel/signal.c */
-asmlinkage long sys_sigreturn(void);
-
-/* kernel/vm86_32.c */
-struct vm86_struct;
-asmlinkage long sys_vm86old(struct vm86_struct __user *);
-asmlinkage long sys_vm86(unsigned long, unsigned long);
-
-#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALLS_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index cf4327986e98..8de8ceccb8bc 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,7 +92,7 @@ struct thread_info {
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
-#define TIF_NOHZ 19 /* in adaptive nohz mode */
+#define TIF_SLD 18 /* Restore split lock detection on context switch */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -122,7 +122,7 @@ struct thread_info {
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
-#define _TIF_NOHZ (1 << TIF_NOHZ)
+#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
@@ -133,19 +133,15 @@ struct thread_info {
#define _TIF_X32 (1 << TIF_X32)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
-/*
- * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
- * enter_from_user_mode()
- */
+/* Work to do before invoking the actual syscall. */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
- _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
+ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
/*
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4b14d2318251..79d8d5496330 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -193,4 +193,29 @@ static inline void sched_clear_itmt_support(void)
}
#endif /* CONFIG_SCHED_MC_PRIO */
+#ifdef CONFIG_SMP
+#include <asm/cpufeature.h>
+
+DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key);
+
+#define arch_scale_freq_invariant() static_branch_likely(&arch_scale_freq_key)
+
+DECLARE_PER_CPU(unsigned long, arch_freq_scale);
+
+static inline long arch_scale_freq_capacity(int cpu)
+{
+ return per_cpu(arch_freq_scale, cpu);
+}
+#define arch_scale_freq_capacity arch_scale_freq_capacity
+
+extern void arch_scale_freq_tick(void);
+#define arch_scale_freq_tick arch_scale_freq_tick
+
+extern void arch_set_max_freq_ratio(bool turbo_disabled);
+#else
+static inline void arch_set_max_freq_ratio(bool turbo_disabled)
+{
+}
+#endif
+
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index ffa0dc8a535e..c26a7e1d8a2c 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -76,27 +76,24 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long err
dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
-asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
-asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
-void __init trap_init(void);
-#endif
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_MCE
-dotraplinkage void do_machine_check(struct pt_regs *regs, long error_code);
-#endif
dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code);
#endif
dotraplinkage void do_mce(struct pt_regs *regs, long error_code);
+#ifdef CONFIG_X86_64
+asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
+asmlinkage __visible notrace
+struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
+void __init trap_init(void);
+#endif
+
static inline int get_si_code(unsigned long condition)
{
if (condition & DR_STEP)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 61d93f062a36..c8247a84244b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -193,23 +193,12 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
: : "A" (x), "r" (addr) \
: : label)
-#define __put_user_asm_ex_u64(x, addr) \
- asm volatile("\n" \
- "1: movl %%eax,0(%1)\n" \
- "2: movl %%edx,4(%1)\n" \
- "3:" \
- _ASM_EXTABLE_EX(1b, 2b) \
- _ASM_EXTABLE_EX(2b, 3b) \
- : : "A" (x), "r" (addr))
-
#define __put_user_x8(x, ptr, __ret_pu) \
asm volatile("call __put_user_8" : "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else
#define __put_user_goto_u64(x, ptr, label) \
__put_user_goto(x, ptr, "q", "", "er", label)
-#define __put_user_asm_ex_u64(x, addr) \
- __put_user_asm_ex(x, addr, "q", "", "er")
#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
#endif
@@ -289,31 +278,6 @@ do { \
} \
} while (0)
-/*
- * This doesn't do __uaccess_begin/end - the exception handling
- * around it must do that.
- */
-#define __put_user_size_ex(x, ptr, size) \
-do { \
- __chk_user_ptr(ptr); \
- switch (size) { \
- case 1: \
- __put_user_asm_ex(x, ptr, "b", "b", "iq"); \
- break; \
- case 2: \
- __put_user_asm_ex(x, ptr, "w", "w", "ir"); \
- break; \
- case 4: \
- __put_user_asm_ex(x, ptr, "l", "k", "ir"); \
- break; \
- case 8: \
- __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \
- break; \
- default: \
- __put_user_bad(); \
- } \
-} while (0)
-
#ifdef CONFIG_X86_32
#define __get_user_asm_u64(x, ptr, retval, errret) \
({ \
@@ -335,12 +299,9 @@ do { \
"i" (errret), "0" (retval)); \
})
-#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
#else
#define __get_user_asm_u64(x, ptr, retval, errret) \
__get_user_asm(x, ptr, retval, "q", "", "=r", errret)
-#define __get_user_asm_ex_u64(x, ptr) \
- __get_user_asm_ex(x, ptr, "q", "", "=r")
#endif
#define __get_user_size(x, ptr, size, retval, errret) \
@@ -378,53 +339,6 @@ do { \
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
-#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \
- asm volatile("\n" \
- "1: mov"itype" %2,%"rtype"1\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %3,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "=r" (err), ltype(x) \
- : "m" (__m(addr)), "i" (errret), "0" (err))
-
-/*
- * This doesn't do __uaccess_begin/end - the exception handling
- * around it must do that.
- */
-#define __get_user_size_ex(x, ptr, size) \
-do { \
- __chk_user_ptr(ptr); \
- switch (size) { \
- case 1: \
- __get_user_asm_ex(x, ptr, "b", "b", "=q"); \
- break; \
- case 2: \
- __get_user_asm_ex(x, ptr, "w", "w", "=r"); \
- break; \
- case 4: \
- __get_user_asm_ex(x, ptr, "l", "k", "=r"); \
- break; \
- case 8: \
- __get_user_asm_ex_u64(x, ptr); \
- break; \
- default: \
- (x) = __get_user_bad(); \
- } \
-} while (0)
-
-#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
- asm volatile("1: mov"itype" %1,%"rtype"0\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:xor"itype" %"rtype"0,%"rtype"0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_EX(1b, 3b) \
- : ltype(x) : "m" (__m(addr)))
-
#define __put_user_nocheck(x, ptr, size) \
({ \
__label__ __pu_label; \
@@ -480,29 +394,6 @@ struct __large_struct { unsigned long buf[100]; };
retval = __put_user_failed(x, addr, itype, rtype, ltype, errret); \
} while (0)
-#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \
- asm volatile("1: mov"itype" %"rtype"0,%1\n" \
- "2:\n" \
- _ASM_EXTABLE_EX(1b, 2b) \
- : : ltype(x), "m" (__m(addr)))
-
-/*
- * uaccess_try and catch
- */
-#define uaccess_try do { \
- current->thread.uaccess_err = 0; \
- __uaccess_begin(); \
- barrier();
-
-#define uaccess_try_nospec do { \
- current->thread.uaccess_err = 0; \
- __uaccess_begin_nospec(); \
-
-#define uaccess_catch(err) \
- __uaccess_end(); \
- (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
-} while (0)
-
/**
* __get_user - Get a simple variable from user space, with less checking.
* @x: Variable to store result.
@@ -552,28 +443,6 @@ struct __large_struct { unsigned long buf[100]; };
#define __put_user(x, ptr) \
__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-/*
- * {get|put}_user_try and catch
- *
- * get_user_try {
- * get_user_ex(...);
- * } get_user_catch(err)
- */
-#define get_user_try uaccess_try_nospec
-#define get_user_catch(err) uaccess_catch(err)
-
-#define get_user_ex(x, ptr) do { \
- unsigned long __gue_val; \
- __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \
- (x) = (__force __typeof__(*(ptr)))__gue_val; \
-} while (0)
-
-#define put_user_try uaccess_try
-#define put_user_catch(err) uaccess_catch(err)
-
-#define put_user_ex(x, ptr) \
- __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
extern unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
extern __must_check long
@@ -584,99 +453,6 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
-extern void __cmpxchg_wrong_size(void)
- __compiletime_error("Bad argument size for cmpxchg");
-
-#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \
-({ \
- int __ret = 0; \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __uaccess_begin_nospec(); \
- switch (size) { \
- case 1: \
- { \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "q" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- case 2: \
- { \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "r" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- case 4: \
- { \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "r" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- case 8: \
- { \
- if (!IS_ENABLED(CONFIG_X86_64)) \
- __cmpxchg_wrong_size(); \
- \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "r" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- default: \
- __cmpxchg_wrong_size(); \
- } \
- __uaccess_end(); \
- *(uval) = __old; \
- __ret; \
-})
-
-#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new) \
-({ \
- access_ok((ptr), sizeof(*(ptr))) ? \
- __user_atomic_cmpxchg_inatomic((uval), (ptr), \
- (old), (new), sizeof(*(ptr))) : \
- -EFAULT; \
-})
-
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
@@ -695,15 +471,6 @@ extern struct movsl_mask {
#endif
/*
- * We rely on the nested NMI work to allow atomic faults from the NMI path; the
- * nested NMI paths are careful to preserve CR2.
- *
- * Caller must use pagefault_enable/disable, or run in interrupt context,
- * and also do a uaccess_ok() check
- */
-#define __copy_from_user_nmi __copy_from_user_inatomic
-
-/*
* The "unsafe" user accesses aren't really "unsafe", but the naming
* is a big fat warning: you have to not only do the access_ok()
* checking before using them, but you have to surround them with the
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index ba2dc1930630..388a40660c7b 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- if (__builtin_constant_p(n)) {
- unsigned long ret;
-
- switch (n) {
- case 1:
- ret = 0;
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u8 *)to, from, ret,
- "b", "b", "=q", 1);
- __uaccess_end();
- return ret;
- case 2:
- ret = 0;
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u16 *)to, from, ret,
- "w", "w", "=r", 2);
- __uaccess_end();
- return ret;
- case 4:
- ret = 0;
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u32 *)to, from, ret,
- "l", "k", "=r", 4);
- __uaccess_end();
- return ret;
- }
- }
return __copy_user_ll(to, (__force const void *)from, n);
}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 5cd1caa8bc65..bc10e3dc64fe 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -65,117 +65,13 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len)
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
- int ret = 0;
-
- if (!__builtin_constant_p(size))
- return copy_user_generic(dst, (__force void *)src, size);
- switch (size) {
- case 1:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
- ret, "b", "b", "=q", 1);
- __uaccess_end();
- return ret;
- case 2:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
- ret, "w", "w", "=r", 2);
- __uaccess_end();
- return ret;
- case 4:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
- ret, "l", "k", "=r", 4);
- __uaccess_end();
- return ret;
- case 8:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 8);
- __uaccess_end();
- return ret;
- case 10:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 10);
- if (likely(!ret))
- __get_user_asm_nozero(*(u16 *)(8 + (char *)dst),
- (u16 __user *)(8 + (char __user *)src),
- ret, "w", "w", "=r", 2);
- __uaccess_end();
- return ret;
- case 16:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 16);
- if (likely(!ret))
- __get_user_asm_nozero(*(u64 *)(8 + (char *)dst),
- (u64 __user *)(8 + (char __user *)src),
- ret, "q", "", "=r", 8);
- __uaccess_end();
- return ret;
- default:
- return copy_user_generic(dst, (__force void *)src, size);
- }
+ return copy_user_generic(dst, (__force void *)src, size);
}
static __always_inline __must_check unsigned long
raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
{
- int ret = 0;
-
- if (!__builtin_constant_p(size))
- return copy_user_generic((__force void *)dst, src, size);
- switch (size) {
- case 1:
- __uaccess_begin();
- __put_user_asm(*(u8 *)src, (u8 __user *)dst,
- ret, "b", "b", "iq", 1);
- __uaccess_end();
- return ret;
- case 2:
- __uaccess_begin();
- __put_user_asm(*(u16 *)src, (u16 __user *)dst,
- ret, "w", "w", "ir", 2);
- __uaccess_end();
- return ret;
- case 4:
- __uaccess_begin();
- __put_user_asm(*(u32 *)src, (u32 __user *)dst,
- ret, "l", "k", "ir", 4);
- __uaccess_end();
- return ret;
- case 8:
- __uaccess_begin();
- __put_user_asm(*(u64 *)src, (u64 __user *)dst,
- ret, "q", "", "er", 8);
- __uaccess_end();
- return ret;
- case 10:
- __uaccess_begin();
- __put_user_asm(*(u64 *)src, (u64 __user *)dst,
- ret, "q", "", "er", 10);
- if (likely(!ret)) {
- asm("":::"memory");
- __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
- ret, "w", "w", "ir", 2);
- }
- __uaccess_end();
- return ret;
- case 16:
- __uaccess_begin();
- __put_user_asm(*(u64 *)src, (u64 __user *)dst,
- ret, "q", "", "er", 16);
- if (likely(!ret)) {
- asm("":::"memory");
- __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
- ret, "q", "", "er", 8);
- }
- __uaccess_end();
- return ret;
- default:
- return copy_user_generic((__force void *)dst, src, size);
- }
+ return copy_user_generic((__force void *)dst, src, size);
}
static __always_inline __must_check
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index a7dd080749ce..c1c3d31b15c0 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -13,10 +13,13 @@
# define __ARCH_WANT_SYS_OLD_MMAP
# define __ARCH_WANT_SYS_OLD_SELECT
+# define __NR_ia32_syscall_max __NR_syscall_max
+
# else
# include <asm/unistd_64.h>
# include <asm/unistd_64_x32.h>
+# include <asm/unistd_32_ia32.h>
# define __ARCH_WANT_SYS_TIME
# define __ARCH_WANT_SYS_UTIME
# define __ARCH_WANT_COMPAT_SYS_PREADV64
@@ -26,6 +29,10 @@
# endif
+# define NR_syscalls (__NR_syscall_max + 1)
+# define X32_NR_syscalls (__NR_x32_syscall_max + 1)
+# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1)
+
# define __ARCH_WANT_NEW_STAT
# define __ARCH_WANT_OLD_READDIR
# define __ARCH_WANT_OLD_STAT
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 7803114aa140..13687bf0e0a9 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -858,4 +858,6 @@ static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
return 1;
}
+void uv_bau_message_interrupt(struct pt_regs *regs);
+
#endif /* _ASM_X86_UV_UV_BAU_H */
diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..119ac8612d89
--- /dev/null
+++ b/arch/x86/include/asm/vdso/clocksource.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_CLOCKSOURCE_H
+#define __ASM_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES \
+ VDSO_CLOCKMODE_TSC, \
+ VDSO_CLOCKMODE_PVCLOCK, \
+ VDSO_CLOCKMODE_HVCLOCK
+
+#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 6ee1f7dba34b..9a6dc9b4ec99 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -243,7 +243,7 @@ static u64 vread_hvclock(void)
static inline u64 __arch_get_hw_counter(s32 clock_mode)
{
- if (clock_mode == VCLOCK_TSC)
+ if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
return (u64)rdtsc_ordered();
/*
* For any memory-mapped vclock type, we need to make sure that gcc
@@ -252,13 +252,13 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode)
* question isn't enabled, which will segfault. Hence the barriers.
*/
#ifdef CONFIG_PARAVIRT_CLOCK
- if (clock_mode == VCLOCK_PVCLOCK) {
+ if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) {
barrier();
return vread_pvclock();
}
#endif
#ifdef CONFIG_HYPERV_TIMER
- if (clock_mode == VCLOCK_HVCLOCK) {
+ if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) {
barrier();
return vread_hvclock();
}
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..57b1a7034c64
--- /dev/null
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static __always_inline void rep_nop(void)
+{
+ asm volatile("rep; nop" ::: "memory");
+}
+
+static __always_inline void cpu_relax(void)
+{
+ rep_nop();
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
index 0026ab2123ce..be199a9b2676 100644
--- a/arch/x86/include/asm/vdso/vsyscall.h
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -10,8 +10,6 @@
#include <asm/vgtod.h>
#include <asm/vvar.h>
-int vclocks_used __read_mostly;
-
DEFINE_VVAR(struct vdso_data, _vdso_data);
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
@@ -23,19 +21,6 @@ struct vdso_data *__x86_get_k_vdso_data(void)
}
#define __arch_get_k_vdso_data __x86_get_k_vdso_data
-static __always_inline
-int __x86_get_clock_mode(struct timekeeper *tk)
-{
- int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
-
- /* Mark the new vclock used. */
- BUILD_BUG_ON(VCLOCK_MAX >= 32);
- WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
-
- return vclock_mode;
-}
-#define __arch_get_clock_mode __x86_get_clock_mode
-
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index a2638c6124ed..7aa38b2ad8a9 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -2,6 +2,11 @@
#ifndef _ASM_X86_VGTOD_H
#define _ASM_X86_VGTOD_H
+/*
+ * This check is required to prevent ARCH=um to include
+ * unwanted headers.
+ */
+#ifdef CONFIG_GENERIC_GETTIMEOFDAY
#include <linux/compiler.h>
#include <asm/clocksource.h>
#include <vdso/datapage.h>
@@ -14,11 +19,6 @@ typedef u64 gtod_long_t;
#else
typedef unsigned long gtod_long_t;
#endif
-
-extern int vclocks_used;
-static inline bool vclock_was_used(int vclock)
-{
- return READ_ONCE(vclocks_used) & (1 << vclock);
-}
+#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2a85287b3685..8521af3fef27 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,7 @@
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
-#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index a50e4a0de315..9915990fd8cf 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -81,6 +81,7 @@
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 503d3f42da16..3f3f780c8c65 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -390,6 +390,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9b294c13809a..bb5abfef0256 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,7 +28,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
-OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
@@ -53,6 +52,8 @@ obj-y += setup.o x86_init.o i8259.o irqinit.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
+obj-$(CONFIG_X86_32) += sys_ia32.o
+obj-$(CONFIG_IA32_EMULATION) += sys_ia32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 04205ce127a1..1ae5439a9a85 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -45,6 +45,7 @@ EXPORT_SYMBOL(acpi_disabled);
#define PREFIX "ACPI: "
int acpi_noirq; /* skip ACPI IRQ initialization */
+int acpi_nobgrt; /* skip ACPI BGRT */
int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
EXPORT_SYMBOL(acpi_pci_disabled);
@@ -1619,7 +1620,7 @@ int __init acpi_boot_init(void)
acpi_process_madt();
acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
- if (IS_ENABLED(CONFIG_ACPI_BGRT))
+ if (IS_ENABLED(CONFIG_ACPI_BGRT) && !acpi_nobgrt)
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
if (!acpi_noirq)
@@ -1671,6 +1672,13 @@ static int __init parse_acpi(char *arg)
}
early_param("acpi", parse_acpi);
+static int __init parse_acpi_bgrt(char *arg)
+{
+ acpi_nobgrt = true;
+ return 0;
+}
+early_param("bgrt_disable", parse_acpi_bgrt);
+
/* FIXME: Using pci= for an ACPI parameter is a travesty. */
static int __init parse_pci(char *arg)
{
@@ -1740,7 +1748,7 @@ int __acpi_acquire_global_lock(unsigned int *lock)
new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
val = cmpxchg(lock, old, new);
} while (unlikely (val != old));
- return (new < 3) ? -1 : 0;
+ return ((new & 0x3) < 3) ? -1 : 0;
}
int __acpi_release_global_lock(unsigned int *lock)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 26b7256f590f..ed3b04483972 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -43,7 +43,7 @@ unsigned long acpi_get_wakeup_address(void)
*
* Wrapper around acpi_enter_sleep_state() to be called by assmebly.
*/
-acpi_status asmlinkage __visible x86_acpi_enter_sleep_state(u8 state)
+asmlinkage acpi_status __visible x86_acpi_enter_sleep_state(u8 state)
{
return acpi_enter_sleep_state(state);
}
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index d06c2079b6c1..171a40c74db6 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -19,4 +19,4 @@ extern void do_suspend_lowlevel(void);
extern int x86_acpi_suspend_lowlevel(void);
-acpi_status asmlinkage x86_acpi_enter_sleep_state(u8 state);
+asmlinkage acpi_status x86_acpi_enter_sleep_state(u8 state);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 15ac0d5f4b40..7867dfb3963e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1167,8 +1167,8 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
atomic_cond_read_acquire(&desc.refs, !VAL);
}
-void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
- const void *opcode, size_t len, const void *emulate)
+static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate)
{
struct insn insn;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 69aed0ebbdfc..b6b3297851f3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -36,10 +36,9 @@ static const struct pci_device_id amd_root_ids[] = {
{}
};
-
#define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704
-const struct pci_device_id amd_nb_misc_ids[] = {
+static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
@@ -56,7 +55,6 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{}
};
-EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5f973fed3c9f..81b9c63dae1b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -546,12 +546,6 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-#define DEADLINE_MODEL_MATCH_FUNC(model, func) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func }
-
-#define DEADLINE_MODEL_MATCH_REV(model, rev) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev }
-
static u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
@@ -588,23 +582,23 @@ static u32 skx_deadline_rev(void)
}
static const struct x86_cpu_id deadline_match[] = {
- DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
- DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev),
- DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
+ X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
+ X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
+ X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
+ X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X, &skx_deadline_rev),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17),
+ X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22),
+ X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20),
+ X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17),
+ X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25),
+ X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2),
+ X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2),
+ X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52),
+ X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52),
+ X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52),
{},
};
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 2c5676b0a6e7..67768e54438b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -557,6 +557,12 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irqd->hwirq = virq + i;
irqd_set_single_target(irqd);
/*
+ * Prevent that any of these interrupts is invoked in
+ * non interrupt context via e.g. generic_handle_irq()
+ * as that can corrupt the affinity move state.
+ */
+ irqd_set_handle_enforce_irqctx(irqd);
+ /*
* Legacy vectors are already assigned when the IOAPIC
* takes them over. They stay on the same vector. This is
* required for check_timer() to work correctly as it might
@@ -838,13 +844,15 @@ static void free_moved_vector(struct apic_chip_data *apicd)
bool managed = apicd->is_managed;
/*
- * This should never happen. Managed interrupts are not
- * migrated except on CPU down, which does not involve the
- * cleanup vector. But try to keep the accounting correct
- * nevertheless.
+ * Managed interrupts are usually not migrated away
+ * from an online CPU, but CPU isolation 'managed_irq'
+ * can make that happen.
+ * 1) Activation does not take the isolation into account
+ * to keep the code simple
+ * 2) Migration away from an isolated CPU can happen when
+ * a non-isolated CPU which is in the calculated
+ * affinity mask comes online.
*/
- WARN_ON_ONCE(managed);
-
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
irq_matrix_free(vector_matrix, cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 5c7ee3df4d0b..3ca07ad552ae 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -88,7 +88,6 @@ static void __used common(void)
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
OFFSET(BP_init_size, boot_params, hdr.init_size);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
- OFFSET(BP_code32_start, boot_params, hdr.code32_start);
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 82826f2275cc..6e043f295a60 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -3,12 +3,9 @@
# error "Please do not build this file directly, build asm-offsets.c instead"
#endif
-#include <asm/ucontext.h>
+#include <linux/efi.h>
-#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
-static char syscalls[] = {
-#include <asm/syscalls_32.h>
-};
+#include <asm/ucontext.h>
/* workaround for a warning with -Wmissing-prototypes */
void foo(void);
@@ -62,6 +59,5 @@ void foo(void)
#endif
BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
- DEFINE(NR_syscalls, sizeof(syscalls));
+ DEFINE(EFI_svam, offsetof(efi_runtime_services_t, set_virtual_address_map));
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 24d2fde30d00..c2a47016f243 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -5,30 +5,6 @@
#include <asm/ia32.h>
-#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
-#define __SYSCALL_X32(nr, sym, qual)
-static char syscalls_64[] = {
-#include <asm/syscalls_64.h>
-};
-#undef __SYSCALL_64
-#undef __SYSCALL_X32
-
-#ifdef CONFIG_X86_X32_ABI
-#define __SYSCALL_64(nr, sym, qual)
-#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
-static char syscalls_x32[] = {
-#include <asm/syscalls_64.h>
-};
-#undef __SYSCALL_64
-#undef __SYSCALL_X32
-#endif
-
-#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
-static char syscalls_ia32[] = {
-#include <asm/syscalls_32.h>
-};
-#undef __SYSCALL_I386
-
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#include <asm/kvm_para.h>
#endif
@@ -90,17 +66,5 @@ int main(void)
DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
BLANK();
#endif
-
- DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
- DEFINE(NR_syscalls, sizeof(syscalls_64));
-
-#ifdef CONFIG_X86_X32_ABI
- DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
- DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
-#endif
-
- DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
- DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
-
return 0;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ac83a0fef628..547ad7bbf0e0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -28,6 +28,7 @@
static const int amd_erratum_383[];
static const int amd_erratum_400[];
+static const int amd_erratum_1054[];
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
/*
@@ -393,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
}
+static void amd_detect_ppin(struct cpuinfo_x86 *c)
+{
+ unsigned long long val;
+
+ if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
+ return;
+
+ /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
+ if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
+ goto clear_ppin;
+
+ /* PPIN is locked in disabled mode, clear feature bit */
+ if ((val & 3UL) == 1UL)
+ goto clear_ppin;
+
+ /* If PPIN is disabled, try to enable it */
+ if (!(val & 2UL)) {
+ wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
+ rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
+ }
+
+ /* If PPIN_EN bit is 1, return from here; otherwise fall through */
+ if (val & 2UL)
+ return;
+
+clear_ppin:
+ clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
+}
+
u16 amd_get_nb_id(int cpu)
{
return per_cpu(cpu_llc_id, cpu);
@@ -925,7 +955,8 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
case 0x16: init_amd_jg(c); break;
- case 0x17: init_amd_zn(c); break;
+ case 0x17: fallthrough;
+ case 0x19: init_amd_zn(c); break;
}
/*
@@ -940,6 +971,7 @@ static void init_amd(struct cpuinfo_x86 *c)
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
+ amd_detect_ppin(c);
init_amd_cacheinfo(c);
@@ -972,6 +1004,15 @@ static void init_amd(struct cpuinfo_x86 *c)
/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+
+ /*
+ * Turn on the Instructions Retired free counter on machines not
+ * susceptible to erratum #1054 "Instructions Retired Performance
+ * Counter May Be Inaccurate".
+ */
+ if (cpu_has(c, X86_FEATURE_IRPERF) &&
+ !cpu_has_amd_erratum(c, amd_erratum_1054))
+ msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
}
#ifdef CONFIG_X86_32
@@ -1099,6 +1140,10 @@ static const int amd_erratum_400[] =
static const int amd_erratum_383[] =
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+ AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52c9bfbbdb2a..bed0cb83fe24 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -445,7 +445,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
* cpuid bit to be set. We need to ensure that we
* update that bit in this CPU's "cpu_info".
*/
- get_cpu_cap(c);
+ set_cpu_cap(c, X86_FEATURE_OSPKE);
}
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -1008,8 +1008,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_ITLB_MULTIHIT BIT(7)
#define NO_SPECTRE_V2 BIT(8)
-#define VULNWL(_vendor, _family, _model, _whitelist) \
- { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
+#define VULNWL(vendor, family, model, whitelist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
#define VULNWL_INTEL(model, whitelist) \
VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
@@ -1224,6 +1224,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_set_bug_bits(c);
+ cpu_set_core_cap_bits(c);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index 0268185bef94..29a3bedabd06 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -5,6 +5,7 @@
#include <asm/msr-index.h>
#include <asm/processor.h>
#include <asm/vmx.h>
+#include "cpu.h"
#undef pr_fmt
#define pr_fmt(fmt) "x86/cpu: " fmt
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index be82cd5841c3..9a26e972cdea 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -19,6 +19,8 @@
#include <asm/microcode_intel.h>
#include <asm/hwcap2.h>
#include <asm/elf.h>
+#include <asm/cpu_device_id.h>
+#include <asm/cmdline.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -31,6 +33,20 @@
#include <asm/apic.h>
#endif
+enum split_lock_detect_state {
+ sld_off = 0,
+ sld_warn,
+ sld_fatal,
+};
+
+/*
+ * Default to sld_off because most systems do not support split lock detection
+ * split_lock_setup() will switch this to sld_warn on systems that support
+ * split lock detect, unless there is a command line override.
+ */
+static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
+static u64 msr_test_ctrl_cache __ro_after_init;
+
/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
@@ -570,6 +586,8 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}
+static void split_lock_init(void);
+
static void init_intel(struct cpuinfo_x86 *c)
{
early_init_intel(c);
@@ -684,6 +702,8 @@ static void init_intel(struct cpuinfo_x86 *c)
tsx_enable();
if (tsx_ctrl_state == TSX_CTRL_DISABLE)
tsx_disable();
+
+ split_lock_init();
}
#ifdef CONFIG_X86_32
@@ -945,3 +965,166 @@ static const struct cpu_dev intel_cpu_dev = {
};
cpu_dev_register(intel_cpu_dev);
+
+#undef pr_fmt
+#define pr_fmt(fmt) "x86/split lock detection: " fmt
+
+static const struct {
+ const char *option;
+ enum split_lock_detect_state state;
+} sld_options[] __initconst = {
+ { "off", sld_off },
+ { "warn", sld_warn },
+ { "fatal", sld_fatal },
+};
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
+}
+
+static bool split_lock_verify_msr(bool on)
+{
+ u64 ctrl, tmp;
+
+ if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
+ return false;
+ if (on)
+ ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+ else
+ ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+ if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
+ return false;
+ rdmsrl(MSR_TEST_CTRL, tmp);
+ return ctrl == tmp;
+}
+
+static void __init split_lock_setup(void)
+{
+ enum split_lock_detect_state state = sld_warn;
+ char arg[20];
+ int i, ret;
+
+ if (!split_lock_verify_msr(false)) {
+ pr_info("MSR access failed: Disabled\n");
+ return;
+ }
+
+ ret = cmdline_find_option(boot_command_line, "split_lock_detect",
+ arg, sizeof(arg));
+ if (ret >= 0) {
+ for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
+ if (match_option(arg, ret, sld_options[i].option)) {
+ state = sld_options[i].state;
+ break;
+ }
+ }
+ }
+
+ switch (state) {
+ case sld_off:
+ pr_info("disabled\n");
+ return;
+ case sld_warn:
+ pr_info("warning about user-space split_locks\n");
+ break;
+ case sld_fatal:
+ pr_info("sending SIGBUS on user-space split_locks\n");
+ break;
+ }
+
+ rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+
+ if (!split_lock_verify_msr(true)) {
+ pr_info("MSR access failed: Disabled\n");
+ return;
+ }
+
+ sld_state = state;
+ setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
+}
+
+/*
+ * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
+ * is not implemented as one thread could undo the setting of the other
+ * thread immediately after dropping the lock anyway.
+ */
+static void sld_update_msr(bool on)
+{
+ u64 test_ctrl_val = msr_test_ctrl_cache;
+
+ if (on)
+ test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+
+ wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
+}
+
+static void split_lock_init(void)
+{
+ split_lock_verify_msr(sld_state != sld_off);
+}
+
+bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+{
+ if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
+ return false;
+
+ pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+ current->comm, current->pid, regs->ip);
+
+ /*
+ * Disable the split lock detection for this task so it can make
+ * progress and set TIF_SLD so the detection is re-enabled via
+ * switch_to_sld() when the task is scheduled out.
+ */
+ sld_update_msr(false);
+ set_tsk_thread_flag(current, TIF_SLD);
+ return true;
+}
+
+/*
+ * This function is called only when switching between tasks with
+ * different split-lock detection modes. It sets the MSR for the
+ * mode of the new task. This is right most of the time, but since
+ * the MSR is shared by hyperthreads on a physical core there can
+ * be glitches when the two threads need different modes.
+ */
+void switch_to_sld(unsigned long tifn)
+{
+ sld_update_msr(!(tifn & _TIF_SLD));
+}
+
+#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY}
+
+/*
+ * The following processors have the split lock detection feature. But
+ * since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot
+ * be enumerated. Enable it by family and model matching on these
+ * processors.
+ */
+static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
+ SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X),
+ SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L),
+ {}
+};
+
+void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
+{
+ u64 ia32_core_caps = 0;
+
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ return;
+ if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) {
+ /* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */
+ rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
+ } else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ /* Enumerate split lock detection by family and model. */
+ if (x86_match_cpu(split_lock_cpu_ids))
+ ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT;
+ }
+
+ if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
+ split_lock_setup();
+}
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 6dd78d8235e4..d3482eb43ff3 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -16,12 +16,17 @@
* respective wildcard entries.
*
* A typical table entry would be to match a specific CPU
- * { X86_VENDOR_INTEL, 6, 0x12 }
- * or to match a specific CPU feature
- * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL,
+ * X86_FEATURE_ANY, NULL);
*
* Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
- * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
+ * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor)
+ *
+ * asm/cpu_device_id.h contains a set of useful macros which are shortcuts
+ * for various common selections. The above can be shortened to:
+ *
+ * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL);
*
* Arrays used to match for this should also be declared using
* MODULE_DEVICE_TABLE(x86cpu, ...)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index b3a50d962851..52de616a8065 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1163,9 +1163,12 @@ static const struct sysfs_ops threshold_ops = {
.store = store,
};
+static void threshold_block_release(struct kobject *kobj);
+
static struct kobj_type threshold_ktype = {
.sysfs_ops = &threshold_ops,
.default_attrs = default_attrs,
+ .release = threshold_block_release,
};
static const char *get_name(unsigned int bank, struct threshold_block *b)
@@ -1198,8 +1201,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return buf_mcatype;
}
-static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
- unsigned int block, u32 address)
+static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
+ unsigned int bank, unsigned int block,
+ u32 address)
{
struct threshold_block *b = NULL;
u32 low, high;
@@ -1243,16 +1247,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
INIT_LIST_HEAD(&b->miscj);
- if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
- list_add(&b->miscj,
- &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
- } else {
- per_cpu(threshold_banks, cpu)[bank]->blocks = b;
- }
+ if (tb->blocks)
+ list_add(&b->miscj, &tb->blocks->miscj);
+ else
+ tb->blocks = b;
- err = kobject_init_and_add(&b->kobj, &threshold_ktype,
- per_cpu(threshold_banks, cpu)[bank]->kobj,
- get_name(bank, b));
+ err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
if (err)
goto out_free;
recurse:
@@ -1260,7 +1260,7 @@ recurse:
if (!address)
return 0;
- err = allocate_threshold_blocks(cpu, bank, block, address);
+ err = allocate_threshold_blocks(cpu, tb, bank, block, address);
if (err)
goto out_free;
@@ -1345,8 +1345,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out_free;
}
- per_cpu(threshold_banks, cpu)[bank] = b;
-
if (is_shared_bank(bank)) {
refcount_set(&b->cpus, 1);
@@ -1357,9 +1355,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
}
}
- err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
- if (!err)
- goto out;
+ err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
+ if (err)
+ goto out_free;
+
+ per_cpu(threshold_banks, cpu)[bank] = b;
+
+ return 0;
out_free:
kfree(b);
@@ -1368,8 +1370,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
return err;
}
-static void deallocate_threshold_block(unsigned int cpu,
- unsigned int bank)
+static void threshold_block_release(struct kobject *kobj)
+{
+ kfree(to_block(kobj));
+}
+
+static void deallocate_threshold_block(unsigned int cpu, unsigned int bank)
{
struct threshold_block *pos = NULL;
struct threshold_block *tmp = NULL;
@@ -1379,13 +1385,11 @@ static void deallocate_threshold_block(unsigned int cpu,
return;
list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
- kobject_put(&pos->kobj);
list_del(&pos->miscj);
- kfree(pos);
+ kobject_put(&pos->kobj);
}
- kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
- per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
+ kobject_put(&head->blocks->kobj);
}
static void __threshold_remove_blocks(struct threshold_bank *b)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c4f949611e4..54165f3569e8 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -142,6 +142,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
+ else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
+ rdmsrl(MSR_AMD_PPIN, m->ppin);
m->microcode = boot_cpu_data.microcode;
}
@@ -1213,8 +1215,14 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
* On Intel systems this is entered on all CPUs in parallel through
* MCE broadcast. However some CPUs might be broken beyond repair,
* so be always careful when synchronizing with others.
+ *
+ * Tracing and kprobes are disabled: if we interrupted a kernel context
+ * with IF=1, we need to minimize stack usage. There are also recursion
+ * issues: if the machine check was due to a failure of the memory
+ * backing the user stack, tracing that reads the user stack will cause
+ * potentially infinite recursion.
*/
-void do_machine_check(struct pt_regs *regs, long error_code)
+void notrace do_machine_check(struct pt_regs *regs, long error_code)
{
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
@@ -1360,6 +1368,7 @@ out_ist:
ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);
+NOKPROBE_SYMBOL(do_machine_check);
#ifndef CONFIG_MEMORY_FAILURE
int memory_failure(unsigned long pfn, int flags)
@@ -1877,6 +1886,8 @@ bool filter_mce(struct mce *m)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return amd_filter_mce(m);
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return intel_filter_mce(m);
return false;
}
@@ -1892,10 +1903,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
-dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+dotraplinkage notrace void do_mce(struct pt_regs *regs, long error_code)
{
machine_check_vector(regs, error_code);
}
+NOKPROBE_SYMBOL(do_mce);
/*
* Called for each booted CPU to set up machine checks.
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 7c8958dee103..d089567a9ce8 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL };
* separate MCEs from kernel messages to avoid bogus bug reports.
*/
-static struct mce_log_buffer mcelog = {
- .signature = MCE_LOG_SIGNATURE,
- .len = MCE_LOG_LEN,
- .recordlen = sizeof(struct mce),
-};
+static struct mce_log_buffer *mcelog;
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
@@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
mutex_lock(&mce_chrdev_read_mutex);
- entry = mcelog.next;
+ entry = mcelog->next;
/*
* When the buffer fills up discard new entries. Assume that the
* earlier errors are the more interesting ones:
*/
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+ if (entry >= mcelog->len) {
+ set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
goto unlock;
}
- mcelog.next = entry + 1;
+ mcelog->next = entry + 1;
- memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
- mcelog.entry[entry].finished = 1;
+ memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
+ mcelog->entry[entry].finished = 1;
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
@@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
/* Only supports full reads right now */
err = -EINVAL;
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
+ if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
goto out;
- next = mcelog.next;
+ next = mcelog->next;
err = 0;
for (i = 0; i < next; i++) {
- struct mce *m = &mcelog.entry[i];
+ struct mce *m = &mcelog->entry[i];
err |= copy_to_user(buf, m, sizeof(*m));
buf += sizeof(*m);
}
- memset(mcelog.entry, 0, next * sizeof(struct mce));
- mcelog.next = 0;
+ memset(mcelog->entry, 0, next * sizeof(struct mce));
+ mcelog->next = 0;
if (err)
err = -EFAULT;
@@ -242,7 +238,7 @@ out:
static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
- if (READ_ONCE(mcelog.next))
+ if (READ_ONCE(mcelog->next))
return EPOLLIN | EPOLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
return EPOLLIN | EPOLLRDNORM;
@@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
case MCE_GET_RECORD_LEN:
return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN:
- return put_user(MCE_LOG_LEN, p);
+ return put_user(mcelog->len, p);
case MCE_GETCLEAR_FLAGS: {
unsigned flags;
do {
- flags = mcelog.flags;
- } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+ flags = mcelog->flags;
+ } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
return put_user(flags, p);
}
@@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = {
static __init int dev_mcelog_init_device(void)
{
+ int mce_log_len;
int err;
+ mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
+ mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL);
+ if (!mcelog)
+ return -ENOMEM;
+
+ strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
+ mcelog->len = mce_log_len;
+ mcelog->recordlen = sizeof(struct mce);
+
/* register character device /dev/mcelog */
err = misc_register(&mce_chrdev_device);
if (err) {
@@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void)
else
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
+ kfree(mcelog);
return err;
}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 5627b1091b85..d8f9230d2034 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -493,17 +493,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
return;
if ((val & 3UL) == 1UL) {
- /* PPIN available but disabled: */
+ /* PPIN locked in disabled mode */
return;
}
- /* If PPIN is disabled, but not locked, try to enable: */
- if (!(val & 3UL)) {
+ /* If PPIN is disabled, try to enable */
+ if (!(val & 2UL)) {
wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
rdmsrl_safe(MSR_PPIN_CTL, &val);
}
- if ((val & 3UL) == 2UL)
+ /* Is the enable bit set? */
+ if (val & 2UL)
set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
}
}
@@ -520,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
}
+
+bool intel_filter_mce(struct mce *m)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
+ if ((c->x86 == 6) &&
+ ((c->x86_model == INTEL_FAM6_HASWELL) ||
+ (c->x86_model == INTEL_FAM6_HASWELL_L) ||
+ (c->x86_model == INTEL_FAM6_BROADWELL) ||
+ (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
+ (m->bank == 0) &&
+ ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
+ return true;
+
+ return false;
+}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b785c0d0b590..3b008172ad73 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -8,6 +8,9 @@
#include <linux/device.h>
#include <asm/mce.h>
+/* Pointer to the installed machine check handler for this CPU setup. */
+extern void (*machine_check_vector)(struct pt_regs *, long error_code);
+
enum severity_level {
MCE_NO_SEVERITY,
MCE_DEFERRED_SEVERITY,
@@ -48,6 +51,7 @@ void cmci_disable_bank(int bank);
void intel_init_cmci(void);
void intel_init_lmce(void);
void intel_clear_lmce(void);
+bool intel_filter_mce(struct mce *m);
#else
# define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; }
@@ -56,6 +60,7 @@ static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { }
+static inline bool intel_filter_mce(struct mce *m) { return false; };
#endif
void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 58b4ee3cda77..f36dc0742085 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -486,9 +486,14 @@ static int thermal_throttle_offline(unsigned int cpu)
{
struct thermal_state *state = &per_cpu(thermal_state, cpu);
struct device *dev = get_cpu_device(cpu);
+ u32 l;
+
+ /* Mask the thermal vector before draining evtl. pending work */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
- cancel_delayed_work(&state->package_throttle.therm_work);
- cancel_delayed_work(&state->core_throttle.therm_work);
+ cancel_delayed_work_sync(&state->package_throttle.therm_work);
+ cancel_delayed_work_sync(&state->core_throttle.therm_work);
state->package_throttle.rate_control_active = false;
state->core_throttle.rate_control_active = false;
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index c222f283b456..300e3fd5ade3 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -4,6 +4,7 @@
#include <linux/cpu.h>
#include <asm/msr.h>
+#include <asm/mwait.h>
#define UMWAIT_C02_ENABLE 0
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 46d732696c1c..9b6fafa69be9 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -25,6 +25,8 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/clocksource.h>
+#include <linux/cpu.h>
+#include <linux/reboot.h>
#include <asm/div64.h>
#include <asm/x86_init.h>
#include <asm/hypervisor.h>
@@ -47,6 +49,11 @@
#define VMWARE_CMD_GETVCPU_INFO 68
#define VMWARE_CMD_LEGACY_X2APIC 3
#define VMWARE_CMD_VCPU_RESERVED 31
+#define VMWARE_CMD_STEALCLOCK 91
+
+#define STEALCLOCK_NOT_AVAILABLE (-1)
+#define STEALCLOCK_DISABLED 0
+#define STEALCLOCK_ENABLED 1
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
__asm__("inl (%%dx), %%eax" : \
@@ -86,6 +93,18 @@
} \
} while (0)
+struct vmware_steal_time {
+ union {
+ uint64_t clock; /* stolen time counter in units of vtsc */
+ struct {
+ /* only for little-endian */
+ uint32_t clock_low;
+ uint32_t clock_high;
+ };
+ };
+ uint64_t reserved[7];
+};
+
static unsigned long vmware_tsc_khz __ro_after_init;
static u8 vmware_hypercall_mode __ro_after_init;
@@ -103,15 +122,25 @@ static unsigned long vmware_get_tsc_khz(void)
#ifdef CONFIG_PARAVIRT
static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
-static int vmw_sched_clock __initdata = 1;
+static bool vmw_sched_clock __initdata = true;
+static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
+static bool has_steal_clock;
+static bool steal_acc __initdata = true; /* steal time accounting */
static __init int setup_vmw_sched_clock(char *s)
{
- vmw_sched_clock = 0;
+ vmw_sched_clock = false;
return 0;
}
early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
+static __init int parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+early_param("no-steal-acc", parse_no_stealacc);
+
static unsigned long long notrace vmware_sched_clock(void)
{
unsigned long long ns;
@@ -122,7 +151,7 @@ static unsigned long long notrace vmware_sched_clock(void)
return ns;
}
-static void __init vmware_sched_clock_setup(void)
+static void __init vmware_cyc2ns_setup(void)
{
struct cyc2ns_data *d = &vmware_cyc2ns;
unsigned long long tsc_now = rdtsc();
@@ -132,17 +161,201 @@ static void __init vmware_sched_clock_setup(void)
d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
d->cyc2ns_shift);
- pv_ops.time.sched_clock = vmware_sched_clock;
- pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset);
+ pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
+}
+
+static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
+{
+ uint32_t result, info;
+
+ asm volatile (VMWARE_HYPERCALL :
+ "=a"(result),
+ "=c"(info) :
+ "a"(VMWARE_HYPERVISOR_MAGIC),
+ "b"(0),
+ "c"(VMWARE_CMD_STEALCLOCK),
+ "d"(0),
+ "S"(arg1),
+ "D"(arg2) :
+ "memory");
+ return result;
+}
+
+static bool stealclock_enable(phys_addr_t pa)
+{
+ return vmware_cmd_stealclock(upper_32_bits(pa),
+ lower_32_bits(pa)) == STEALCLOCK_ENABLED;
+}
+
+static int __stealclock_disable(void)
+{
+ return vmware_cmd_stealclock(0, 1);
+}
+
+static void stealclock_disable(void)
+{
+ __stealclock_disable();
+}
+
+static bool vmware_is_stealclock_available(void)
+{
+ return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
+}
+
+/**
+ * vmware_steal_clock() - read the per-cpu steal clock
+ * @cpu: the cpu number whose steal clock we want to read
+ *
+ * The function reads the steal clock if we are on a 64-bit system, otherwise
+ * reads it in parts, checking that the high part didn't change in the
+ * meantime.
+ *
+ * Return:
+ * The steal clock reading in ns.
+ */
+static uint64_t vmware_steal_clock(int cpu)
+{
+ struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
+ uint64_t clock;
+
+ if (IS_ENABLED(CONFIG_64BIT))
+ clock = READ_ONCE(steal->clock);
+ else {
+ uint32_t initial_high, low, high;
+
+ do {
+ initial_high = READ_ONCE(steal->clock_high);
+ /* Do not reorder initial_high and high readings */
+ virt_rmb();
+ low = READ_ONCE(steal->clock_low);
+ /* Keep low reading in between */
+ virt_rmb();
+ high = READ_ONCE(steal->clock_high);
+ } while (initial_high != high);
+
+ clock = ((uint64_t)high << 32) | low;
+ }
+
+ return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
+ vmware_cyc2ns.cyc2ns_shift);
+}
+
+static void vmware_register_steal_time(void)
+{
+ int cpu = smp_processor_id();
+ struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
+
+ if (!has_steal_clock)
+ return;
+
+ if (!stealclock_enable(slow_virt_to_phys(st))) {
+ has_steal_clock = false;
+ return;
+ }
+
+ pr_info("vmware-stealtime: cpu %d, pa %llx\n",
+ cpu, (unsigned long long) slow_virt_to_phys(st));
}
+static void vmware_disable_steal_time(void)
+{
+ if (!has_steal_clock)
+ return;
+
+ stealclock_disable();
+}
+
+static void vmware_guest_cpu_init(void)
+{
+ if (has_steal_clock)
+ vmware_register_steal_time();
+}
+
+static void vmware_pv_guest_cpu_reboot(void *unused)
+{
+ vmware_disable_steal_time();
+}
+
+static int vmware_pv_reboot_notify(struct notifier_block *nb,
+ unsigned long code, void *unused)
+{
+ if (code == SYS_RESTART)
+ on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block vmware_pv_reboot_nb = {
+ .notifier_call = vmware_pv_reboot_notify,
+};
+
+#ifdef CONFIG_SMP
+static void __init vmware_smp_prepare_boot_cpu(void)
+{
+ vmware_guest_cpu_init();
+ native_smp_prepare_boot_cpu();
+}
+
+static int vmware_cpu_online(unsigned int cpu)
+{
+ local_irq_disable();
+ vmware_guest_cpu_init();
+ local_irq_enable();
+ return 0;
+}
+
+static int vmware_cpu_down_prepare(unsigned int cpu)
+{
+ local_irq_disable();
+ vmware_disable_steal_time();
+ local_irq_enable();
+ return 0;
+}
+#endif
+
+static __init int activate_jump_labels(void)
+{
+ if (has_steal_clock) {
+ static_key_slow_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
+ }
+
+ return 0;
+}
+arch_initcall(activate_jump_labels);
+
static void __init vmware_paravirt_ops_setup(void)
{
pv_info.name = "VMware hypervisor";
pv_ops.cpu.io_delay = paravirt_nop;
- if (vmware_tsc_khz && vmw_sched_clock)
- vmware_sched_clock_setup();
+ if (vmware_tsc_khz == 0)
+ return;
+
+ vmware_cyc2ns_setup();
+
+ if (vmw_sched_clock)
+ pv_ops.time.sched_clock = vmware_sched_clock;
+
+ if (vmware_is_stealclock_available()) {
+ has_steal_clock = true;
+ pv_ops.time.steal_clock = vmware_steal_clock;
+
+ /* We use reboot notifier only to disable steal clock */
+ register_reboot_notifier(&vmware_pv_reboot_nb);
+
+#ifdef CONFIG_SMP
+ smp_ops.smp_prepare_boot_cpu =
+ vmware_smp_prepare_boot_cpu;
+ if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "x86/vmware:online",
+ vmware_cpu_online,
+ vmware_cpu_down_prepare) < 0)
+ pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
+#else
+ vmware_guest_cpu_init();
+#endif
+ }
}
#else
#define vmware_paravirt_ops_setup() do {} while (0)
@@ -213,7 +426,7 @@ static void __init vmware_platform_setup(void)
vmware_set_capabilities();
}
-static u8 vmware_select_hypercall(void)
+static u8 __init vmware_select_hypercall(void)
{
int eax, ebx, ecx, edx;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index a1806598aaa4..32b153d38748 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -120,11 +120,6 @@ static bool xfeature_is_supervisor(int xfeature_nr)
return ecx & 1;
}
-static bool xfeature_is_user(int xfeature_nr)
-{
- return !xfeature_is_supervisor(xfeature_nr);
-}
-
/*
* When executing XSAVEOPT (or other optimized XSAVE instructions), if
* a processor implementation detects that an FPU state component is still
@@ -265,21 +260,25 @@ static void __init setup_xstate_features(void)
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ xstate_sizes[i] = eax;
+
/*
- * If an xfeature is supervisor state, the offset
- * in EBX is invalid. We leave it to -1.
+ * If an xfeature is supervisor state, the offset in EBX is
+ * invalid, leave it to -1.
*/
- if (xfeature_is_user(i))
- xstate_offsets[i] = ebx;
+ if (xfeature_is_supervisor(i))
+ continue;
+
+ xstate_offsets[i] = ebx;
- xstate_sizes[i] = eax;
/*
- * In our xstate size checks, we assume that the
- * highest-numbered xstate feature has the
- * highest offset in the buffer. Ensure it does.
+ * In our xstate size checks, we assume that the highest-numbered
+ * xstate feature has the highest offset in the buffer. Ensure
+ * it does.
*/
WARN_ONCE(last_good_offset > xstate_offsets[i],
- "x86/fpu: misordered xstate at %d\n", last_good_offset);
+ "x86/fpu: misordered xstate at %d\n", last_good_offset);
+
last_good_offset = xstate_offsets[i];
}
}
@@ -326,6 +325,13 @@ static int xfeature_is_aligned(int xfeature_nr)
u32 eax, ebx, ecx, edx;
CHECK_XFEATURE(xfeature_nr);
+
+ if (!xfeature_enabled(xfeature_nr)) {
+ WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
+ xfeature_nr);
+ return 0;
+ }
+
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
/*
* The value returned by ECX[1] indicates the alignment
@@ -338,11 +344,11 @@ static int xfeature_is_aligned(int xfeature_nr)
/*
* This function sets up offsets and sizes of all extended states in
* xsave area. This supports both standard format and compacted format
- * of the xsave aread.
+ * of the xsave area.
*/
-static void __init setup_xstate_comp(void)
+static void __init setup_xstate_comp_offsets(void)
{
- unsigned int xstate_comp_sizes[XFEATURE_MAX];
+ unsigned int next_offset;
int i;
/*
@@ -356,31 +362,23 @@ static void __init setup_xstate_comp(void)
if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i)) {
+ if (xfeature_enabled(i))
xstate_comp_offsets[i] = xstate_offsets[i];
- xstate_comp_sizes[i] = xstate_sizes[i];
- }
}
return;
}
- xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
- FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i))
- xstate_comp_sizes[i] = xstate_sizes[i];
- else
- xstate_comp_sizes[i] = 0;
+ if (!xfeature_enabled(i))
+ continue;
- if (i > FIRST_EXTENDED_XFEATURE) {
- xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
- + xstate_comp_sizes[i-1];
+ if (xfeature_is_aligned(i))
+ next_offset = ALIGN(next_offset, 64);
- if (xfeature_is_aligned(i))
- xstate_comp_offsets[i] =
- ALIGN(xstate_comp_offsets[i], 64);
- }
+ xstate_comp_offsets[i] = next_offset;
+ next_offset += xstate_sizes[i];
}
}
@@ -774,7 +772,7 @@ void __init fpu__init_system_xstate(void)
fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
- setup_xstate_comp();
+ setup_xstate_comp_offsets();
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
@@ -897,8 +895,6 @@ const void *get_xsave_field_ptr(int xfeature_nr)
#ifdef CONFIG_ARCH_HAS_PKEYS
-#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
-#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
* This will go out and modify PKRU register to set the access
* rights for @pkey to @init_val.
@@ -917,6 +913,13 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
+ /*
+ * This code should only be called with valid 'pkey'
+ * values originating from in-kernel users. Complain
+ * if a bad value is observed.
+ */
+ WARN_ON_ONCE(pkey >= arch_max_pkey());
+
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
new_pkru_bits |= PKRU_AD_BIT;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 3923ab4630d7..f66a6b90f954 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -67,11 +67,6 @@ __HEAD
SYM_CODE_START(startup_32)
movl pa(initial_stack),%ecx
- /* test KEEP_SEGMENTS flag to see if the bootloader is asking
- us to not reload segments */
- testb $KEEP_SEGMENTS, BP_loadflags(%esi)
- jnz 2f
-
/*
* Set segments to known values.
*/
@@ -82,7 +77,6 @@ SYM_CODE_START(startup_32)
movl %eax,%fs
movl %eax,%gs
movl %eax,%ss
-2:
leal -__PAGE_OFFSET(%ecx),%esp
/*
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index 4d4f5d9faac3..7dfb1e808928 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -10,8 +10,6 @@ extern struct boot_params boot_params;
static enum efi_secureboot_mode get_sb_mode(void)
{
- efi_char16_t efi_SecureBoot_name[] = L"SecureBoot";
- efi_char16_t efi_SetupMode_name[] = L"SecureBoot";
efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
efi_status_t status;
unsigned long size;
@@ -19,13 +17,13 @@ static enum efi_secureboot_mode get_sb_mode(void)
size = sizeof(secboot);
- if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+ if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) {
pr_info("ima: secureboot mode unknown, no efi\n");
return efi_secureboot_mode_unknown;
}
/* Get variable contents into buffer */
- status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid,
+ status = efi.get_variable(L"SecureBoot", &efi_variable_guid,
NULL, &size, &secboot);
if (status == EFI_NOT_FOUND) {
pr_info("ima: secureboot mode disabled\n");
@@ -38,7 +36,7 @@ static enum efi_secureboot_mode get_sb_mode(void)
}
size = sizeof(setupmode);
- status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid,
+ status = efi.get_variable(L"SetupMode", &efi_variable_guid,
NULL, &size, &setupmode);
if (status != EFI_SUCCESS) /* ignore unknown SetupMode */
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8abeee0dd7bf..a53e7b4a7419 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -13,6 +13,7 @@
#include <asm/io_bitmap.h>
#include <asm/desc.h>
+#include <asm/syscalls.h>
#ifdef CONFIG_X86_IOPL_IOPERM
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 21efee32e2b1..c7965ff429c5 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -230,7 +230,7 @@ u64 arch_irq_stat(void)
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
+__visible void __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc * desc;
@@ -263,7 +263,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
exiting_irq();
set_irq_regs(old_regs);
- return 1;
}
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 16919a9671fa..5aa523c2d573 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -44,15 +44,6 @@
* (these are usually mapped into the 0x30-0xff vector range)
*/
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-static struct irqaction irq2 = {
- .handler = no_action,
- .name = "cascade",
- .flags = IRQF_NO_THREAD,
-};
-
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
};
@@ -84,7 +75,7 @@ void __init init_IRQ(void)
* On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
- * these IRQ's are handled by more mordern controllers like IO-APIC,
+ * these IRQs are handled by more modern controllers like IO-APIC,
* then this vector space can be freed and re-used dynamically as the
* irq's migrate etc.
*/
@@ -104,6 +95,9 @@ void __init native_init_IRQ(void)
idt_setup_apic_and_irq_gates();
lapic_assign_system_vectors();
- if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
- setup_irq(2, &irq2);
+ if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
+ /* IRQ2 is cascade interrupt to second interrupt controller */
+ if (request_irq(2, no_action, IRQF_NO_THREAD, "cascade", NULL))
+ pr_err("%s: request_irq() failed\n", "cascade");
+ }
}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 9c4498ea0b3c..5ba8477c2cb7 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -58,7 +58,7 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type,
return code;
}
-static void inline __jump_label_transform(struct jump_entry *entry,
+static inline void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index f293d872602a..db6578d45157 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -141,9 +141,8 @@ prepare_add_efi_setup_data(struct boot_params *params,
struct setup_data *sd = (void *)params + efi_setup_data_offset;
struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
- esd->fw_vendor = efi.fw_vendor;
- esd->runtime = efi.runtime;
- esd->tables = efi.config_table;
+ esd->fw_vendor = efi_fw_vendor;
+ esd->tables = efi_config_table;
esd->smbios = efi.smbios;
sd->type = SETUP_EFI;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 3f45b5c43a71..ea13f6888284 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -71,6 +71,21 @@ found:
return (unsigned long)buf;
}
+static void synthesize_clac(kprobe_opcode_t *addr)
+{
+ /*
+ * Can't be static_cpu_has() due to how objtool treats this feature bit.
+ * This isn't a fast path anyway.
+ */
+ if (!boot_cpu_has(X86_FEATURE_SMAP))
+ return;
+
+ /* Replace the NOP3 with CLAC */
+ addr[0] = 0x0f;
+ addr[1] = 0x01;
+ addr[2] = 0xca;
+}
+
/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
{
@@ -92,6 +107,9 @@ asm (
/* We don't bother saving the ss register */
" pushq %rsp\n"
" pushfq\n"
+ ".global optprobe_template_clac\n"
+ "optprobe_template_clac:\n"
+ ASM_NOP3
SAVE_REGS_STRING
" movq %rsp, %rsi\n"
".global optprobe_template_val\n"
@@ -111,6 +129,9 @@ asm (
#else /* CONFIG_X86_32 */
" pushl %esp\n"
" pushfl\n"
+ ".global optprobe_template_clac\n"
+ "optprobe_template_clac:\n"
+ ASM_NOP3
SAVE_REGS_STRING
" movl %esp, %edx\n"
".global optprobe_template_val\n"
@@ -134,6 +155,8 @@ asm (
void optprobe_template_func(void);
STACK_FRAME_NON_STANDARD(optprobe_template_func);
+#define TMPL_CLAC_IDX \
+ ((long)optprobe_template_clac - (long)optprobe_template_entry)
#define TMPL_MOVE_IDX \
((long)optprobe_template_val - (long)optprobe_template_entry)
#define TMPL_CALL_IDX \
@@ -389,6 +412,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
op->optinsn.size = ret;
len = TMPL_END_IDX + op->optinsn.size;
+ synthesize_clac(buf + TMPL_CLAC_IDX);
+
/* Set probe information */
synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d817f255aed8..6efe0410fb72 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -425,7 +425,29 @@ static void __init sev_map_percpu_data(void)
}
}
+static bool pv_tlb_flush_supported(void)
+{
+ return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+}
+
+static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
+
#ifdef CONFIG_SMP
+
+static bool pv_ipi_supported(void)
+{
+ return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
+}
+
+static bool pv_sched_yield_supported(void)
+{
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+}
+
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
static void __send_ipi_mask(const struct cpumask *mask, int vector)
@@ -490,12 +512,12 @@ static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned int this_cpu = smp_processor_id();
- struct cpumask new_mask;
+ struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
const struct cpumask *local_mask;
- cpumask_copy(&new_mask, mask);
- cpumask_clear_cpu(this_cpu, &new_mask);
- local_mask = &new_mask;
+ cpumask_copy(new_mask, mask);
+ cpumask_clear_cpu(this_cpu, new_mask);
+ local_mask = new_mask;
__send_ipi_mask(local_mask, vector);
}
@@ -575,7 +597,6 @@ static void __init kvm_apf_trap_init(void)
update_intr_gate(X86_TRAP_PF, async_page_fault);
}
-static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
static void kvm_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info)
@@ -583,7 +604,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
u8 state;
int cpu;
struct kvm_steal_time *src;
- struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
+ struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
cpumask_copy(flushmask, cpumask);
/*
@@ -619,11 +640,10 @@ static void __init kvm_guest_init(void)
pv_ops.time.steal_clock = kvm_steal_clock;
}
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_tlb_flush_supported()) {
pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
+ pr_info("KVM setup pv remote TLB flush\n");
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -632,9 +652,7 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
- if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_sched_yield_supported()) {
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
pr_info("KVM setup pv sched yield\n");
}
@@ -700,7 +718,7 @@ static uint32_t __init kvm_detect(void)
static void __init kvm_apic_init(void)
{
#if defined(CONFIG_SMP)
- if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
+ if (pv_ipi_supported())
kvm_setup_pv_ipi();
#endif
}
@@ -732,26 +750,31 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);
-static __init int kvm_setup_pv_tlb_flush(void)
+static __init int kvm_alloc_cpumask(void)
{
int cpu;
+ bool alloc = false;
if (!kvm_para_available() || nopv)
return 0;
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_tlb_flush_supported())
+ alloc = true;
+
+#if defined(CONFIG_SMP)
+ if (pv_ipi_supported())
+ alloc = true;
+#endif
+
+ if (alloc)
for_each_possible_cpu(cpu) {
- zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
+ zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
GFP_KERNEL, cpu_to_node(cpu));
}
- pr_info("KVM setup pv remote TLB flush\n");
- }
return 0;
}
-arch_initcall(kvm_setup_pv_tlb_flush);
+arch_initcall(kvm_alloc_cpumask);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 904494b924c1..34b18f6eeb2c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -159,12 +159,19 @@ bool kvm_check_and_clear_guest_paused(void)
return ret;
}
+static int kvm_cs_enable(struct clocksource *cs)
+{
+ vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
+ return 0;
+}
+
struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .enable = kvm_cs_enable,
};
EXPORT_SYMBOL_GPL(kvm_clock);
@@ -272,7 +279,7 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
if (!(flags & PVCLOCK_TSC_STABLE_BIT))
return 0;
- kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
+ kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
#endif
kvmclock_init_mem();
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index c57e1ca70fd1..84c3ba32f211 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -27,7 +27,6 @@
#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
-#include <asm/syscalls.h>
#include <asm/pgtable_areas.h>
/* This is a multiple of PAGE_SIZE. */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 54c21d6abd5a..6407ea21fa1b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -403,9 +403,9 @@ static void default_do_nmi(struct pt_regs *regs)
* a 'real' unknown NMI. For example, while processing
* a perf NMI another perf NMI comes in along with a
* 'real' unknown NMI. These two NMIs get combined into
- * one (as descibed above). When the next NMI gets
+ * one (as described above). When the next NMI gets
* processed, it will be flagged by perf as handled, but
- * noone will know that there was a 'real' unknown NMI sent
+ * no one will know that there was a 'real' unknown NMI sent
* also. As a result it gets swallowed. Or if the first
* perf NMI returns two events handled then the second
* NMI will get eaten by the logic below, again losing a
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 789f5e4f89de..c131ba4e70ef 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -30,6 +30,7 @@
#include <asm/timer.h>
#include <asm/special_insns.h>
#include <asm/tlb.h>
+#include <asm/io_bitmap.h>
/*
* nop stub, which must not clobber anything *including the stack* to
@@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = {
.cpu.iret = native_iret,
.cpu.swapgs = native_swapgs,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .cpu.update_io_bitmap = native_tss_update_io_bitmap,
+#endif
+
.cpu.start_context_switch = paravirt_nop,
.cpu.end_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 839b5244e3b7..9da70b279dad 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,7 +28,6 @@
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
#include <asm/apic.h>
-#include <asm/syscalls.h>
#include <linux/uaccess.h>
#include <asm/mwait.h>
#include <asm/fpu/internal.h>
@@ -374,7 +373,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
/**
* tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
*/
-void tss_update_io_bitmap(void)
+void native_tss_update_io_bitmap(void)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
struct thread_struct *t = &current->thread;
@@ -650,6 +649,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
/* Enforce MSR update to ensure consistent state */
__speculation_ctrl_update(~tifn, tifn);
}
+
+ if ((tifp ^ tifn) & _TIF_SLD)
+ switch_to_sld(tifn);
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5052ced43373..954b013cc585 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -49,7 +49,6 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
-#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/vm86.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ffd497804dbc..5ef9d8f25b0e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -48,7 +48,6 @@
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
-#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 10125358b9c4..11065dc03f5b 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -145,7 +145,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti)
{
- WARN_ON(vclock_was_used(VCLOCK_PVCLOCK));
+ WARN_ON(vclock_was_used(VDSO_CLOCKMODE_PVCLOCK));
pvti_cpu0_va = pvti;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 0cc7c0b106bb..3ca43be4f9cf 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -531,7 +531,7 @@ static void emergency_vmx_disable_all(void)
/*
* We need to disable VMX on all CPUs before rebooting, otherwise
- * we risk hanging up the machine, because the CPU ignore INIT
+ * we risk hanging up the machine, because the CPU ignores INIT
* signals when VMX is enabled.
*
* We can't take any locks and we may be on an inconsistent
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index ef3ba99068d3..a4d9a261425b 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -9,6 +9,8 @@
#include <asm/kexec.h>
#include <asm/processor-flags.h>
#include <asm/pgtable_types.h>
+#include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
/*
* Must be relocatable PIC code callable as a C function
@@ -39,6 +41,7 @@
.align PAGE_SIZE
.code64
SYM_CODE_START_NOALIGN(relocate_kernel)
+ UNWIND_HINT_EMPTY
/*
* %rdi indirection_page
* %rsi page_list
@@ -105,6 +108,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+ UNWIND_HINT_EMPTY
/* set return address to 0 if not preserving context */
pushq $0
/* store the start address on the stack */
@@ -192,14 +196,12 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
1:
popq %rdx
leaq PAGE_SIZE(%r10), %rsp
+ ANNOTATE_RETPOLINE_SAFE
call *%rdx
/* get the re-entry point of the peer system */
movq 0(%rsp), %rbp
- call 1f
-1:
- popq %r8
- subq $(1b - relocate_kernel), %r8
+ leaq relocate_kernel(%rip), %r8
movq CP_PA_SWAP_PAGE(%r8), %r10
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
movq CP_PA_TABLE_PAGE(%r8), %rax
@@ -212,6 +214,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+ UNWIND_HINT_EMPTY
movq RSP(%r8), %rsp
movq CR4(%r8), %rax
movq %rax, %cr4
@@ -233,6 +236,7 @@ SYM_CODE_END(virtual_mapped)
/* Do the copies */
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+ UNWIND_HINT_EMPTY
movq %rdi, %rcx /* Put the page_list in %rcx */
xorl %edi, %edi
xorl %esi, %esi
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a74262c71484..e6b545047f38 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -64,7 +64,6 @@ RESERVE_BRK(dmi_alloc, 65536);
* at link time, with RESERVE_BRK*() facility reserving additional
* chunks.
*/
-static __initdata
unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 8a29573851a3..83b74fb38c8f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -42,29 +42,9 @@
#endif /* CONFIG_X86_64 */
#include <asm/syscall.h>
-#include <asm/syscalls.h>
-
#include <asm/sigframe.h>
#include <asm/signal.h>
-#define COPY(x) do { \
- get_user_ex(regs->x, &sc->x); \
-} while (0)
-
-#define GET_SEG(seg) ({ \
- unsigned short tmp; \
- get_user_ex(tmp, &sc->seg); \
- tmp; \
-})
-
-#define COPY_SEG(seg) do { \
- regs->seg = GET_SEG(seg); \
-} while (0)
-
-#define COPY_SEG_CPL3(seg) do { \
- regs->seg = GET_SEG(seg) | 3; \
-} while (0)
-
#ifdef CONFIG_X86_64
/*
* If regs->ss will cause an IRET fault, change it. Otherwise leave it
@@ -92,53 +72,58 @@ static void force_valid_ss(struct pt_regs *regs)
ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
regs->ss = __USER_DS;
}
+# define CONTEXT_COPY_SIZE offsetof(struct sigcontext, reserved1)
+#else
+# define CONTEXT_COPY_SIZE sizeof(struct sigcontext)
#endif
static int restore_sigcontext(struct pt_regs *regs,
- struct sigcontext __user *sc,
+ struct sigcontext __user *usc,
unsigned long uc_flags)
{
- unsigned long buf_val;
- void __user *buf;
- unsigned int tmpflags;
- unsigned int err = 0;
+ struct sigcontext sc;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
- get_user_try {
+ if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE))
+ return -EFAULT;
#ifdef CONFIG_X86_32
- set_user_gs(regs, GET_SEG(gs));
- COPY_SEG(fs);
- COPY_SEG(es);
- COPY_SEG(ds);
+ set_user_gs(regs, sc.gs);
+ regs->fs = sc.fs;
+ regs->es = sc.es;
+ regs->ds = sc.ds;
#endif /* CONFIG_X86_32 */
- COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip); COPY(ax);
+ regs->bx = sc.bx;
+ regs->cx = sc.cx;
+ regs->dx = sc.dx;
+ regs->si = sc.si;
+ regs->di = sc.di;
+ regs->bp = sc.bp;
+ regs->ax = sc.ax;
+ regs->sp = sc.sp;
+ regs->ip = sc.ip;
#ifdef CONFIG_X86_64
- COPY(r8);
- COPY(r9);
- COPY(r10);
- COPY(r11);
- COPY(r12);
- COPY(r13);
- COPY(r14);
- COPY(r15);
+ regs->r8 = sc.r8;
+ regs->r9 = sc.r9;
+ regs->r10 = sc.r10;
+ regs->r11 = sc.r11;
+ regs->r12 = sc.r12;
+ regs->r13 = sc.r13;
+ regs->r14 = sc.r14;
+ regs->r15 = sc.r15;
#endif /* CONFIG_X86_64 */
- COPY_SEG_CPL3(cs);
- COPY_SEG_CPL3(ss);
-
- get_user_ex(tmpflags, &sc->flags);
- regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- regs->orig_ax = -1; /* disable syscall checks */
+ /* Get CS/SS and force CPL3 */
+ regs->cs = sc.cs | 0x03;
+ regs->ss = sc.ss | 0x03;
- get_user_ex(buf_val, &sc->fpstate);
- buf = (void __user *)buf_val;
- } get_user_catch(err);
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
+ /* disable syscall checks */
+ regs->orig_ax = -1;
#ifdef CONFIG_X86_64
/*
@@ -149,70 +134,78 @@ static int restore_sigcontext(struct pt_regs *regs,
force_valid_ss(regs);
#endif
- err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
-
- return err;
+ return fpu__restore_sig((void __user *)sc.fpstate,
+ IS_ENABLED(CONFIG_X86_32));
}
-int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
+static __always_inline int
+__unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
- int err = 0;
-
- put_user_try {
-
#ifdef CONFIG_X86_32
- put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
- put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
- put_user_ex(regs->es, (unsigned int __user *)&sc->es);
- put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
+ unsafe_put_user(get_user_gs(regs),
+ (unsigned int __user *)&sc->gs, Efault);
+ unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault);
+ unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault);
+ unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault);
#endif /* CONFIG_X86_32 */
- put_user_ex(regs->di, &sc->di);
- put_user_ex(regs->si, &sc->si);
- put_user_ex(regs->bp, &sc->bp);
- put_user_ex(regs->sp, &sc->sp);
- put_user_ex(regs->bx, &sc->bx);
- put_user_ex(regs->dx, &sc->dx);
- put_user_ex(regs->cx, &sc->cx);
- put_user_ex(regs->ax, &sc->ax);
+ unsafe_put_user(regs->di, &sc->di, Efault);
+ unsafe_put_user(regs->si, &sc->si, Efault);
+ unsafe_put_user(regs->bp, &sc->bp, Efault);
+ unsafe_put_user(regs->sp, &sc->sp, Efault);
+ unsafe_put_user(regs->bx, &sc->bx, Efault);
+ unsafe_put_user(regs->dx, &sc->dx, Efault);
+ unsafe_put_user(regs->cx, &sc->cx, Efault);
+ unsafe_put_user(regs->ax, &sc->ax, Efault);
#ifdef CONFIG_X86_64
- put_user_ex(regs->r8, &sc->r8);
- put_user_ex(regs->r9, &sc->r9);
- put_user_ex(regs->r10, &sc->r10);
- put_user_ex(regs->r11, &sc->r11);
- put_user_ex(regs->r12, &sc->r12);
- put_user_ex(regs->r13, &sc->r13);
- put_user_ex(regs->r14, &sc->r14);
- put_user_ex(regs->r15, &sc->r15);
+ unsafe_put_user(regs->r8, &sc->r8, Efault);
+ unsafe_put_user(regs->r9, &sc->r9, Efault);
+ unsafe_put_user(regs->r10, &sc->r10, Efault);
+ unsafe_put_user(regs->r11, &sc->r11, Efault);
+ unsafe_put_user(regs->r12, &sc->r12, Efault);
+ unsafe_put_user(regs->r13, &sc->r13, Efault);
+ unsafe_put_user(regs->r14, &sc->r14, Efault);
+ unsafe_put_user(regs->r15, &sc->r15, Efault);
#endif /* CONFIG_X86_64 */
- put_user_ex(current->thread.trap_nr, &sc->trapno);
- put_user_ex(current->thread.error_code, &sc->err);
- put_user_ex(regs->ip, &sc->ip);
+ unsafe_put_user(current->thread.trap_nr, &sc->trapno, Efault);
+ unsafe_put_user(current->thread.error_code, &sc->err, Efault);
+ unsafe_put_user(regs->ip, &sc->ip, Efault);
#ifdef CONFIG_X86_32
- put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
- put_user_ex(regs->flags, &sc->flags);
- put_user_ex(regs->sp, &sc->sp_at_signal);
- put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
+ unsafe_put_user(regs->cs, (unsigned int __user *)&sc->cs, Efault);
+ unsafe_put_user(regs->flags, &sc->flags, Efault);
+ unsafe_put_user(regs->sp, &sc->sp_at_signal, Efault);
+ unsafe_put_user(regs->ss, (unsigned int __user *)&sc->ss, Efault);
#else /* !CONFIG_X86_32 */
- put_user_ex(regs->flags, &sc->flags);
- put_user_ex(regs->cs, &sc->cs);
- put_user_ex(0, &sc->gs);
- put_user_ex(0, &sc->fs);
- put_user_ex(regs->ss, &sc->ss);
+ unsafe_put_user(regs->flags, &sc->flags, Efault);
+ unsafe_put_user(regs->cs, &sc->cs, Efault);
+ unsafe_put_user(0, &sc->gs, Efault);
+ unsafe_put_user(0, &sc->fs, Efault);
+ unsafe_put_user(regs->ss, &sc->ss, Efault);
#endif /* CONFIG_X86_32 */
- put_user_ex(fpstate, (unsigned long __user *)&sc->fpstate);
-
- /* non-iBCS2 extensions.. */
- put_user_ex(mask, &sc->oldmask);
- put_user_ex(current->thread.cr2, &sc->cr2);
- } put_user_catch(err);
+ unsafe_put_user(fpstate, (unsigned long __user *)&sc->fpstate, Efault);
- return err;
+ /* non-iBCS2 extensions.. */
+ unsafe_put_user(mask, &sc->oldmask, Efault);
+ unsafe_put_user(current->thread.cr2, &sc->cr2, Efault);
+ return 0;
+Efault:
+ return -EFAULT;
}
+#define unsafe_put_sigcontext(sc, fp, regs, set, label) \
+do { \
+ if (__unsafe_setup_sigcontext(sc, fp, regs, set->sig[0])) \
+ goto label; \
+} while(0);
+
+#define unsafe_put_sigmask(set, frame, label) \
+ unsafe_put_user(*(__u64 *)(set), \
+ (__u64 __user *)&(frame)->uc.uc_sigmask, \
+ label)
+
/*
* Set up a signal frame.
*/
@@ -312,26 +305,16 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
{
struct sigframe __user *frame;
void __user *restorer;
- int err = 0;
- void __user *fpstate = NULL;
-
- frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
-
- if (!access_ok(frame, sizeof(*frame)))
- return -EFAULT;
+ void __user *fp = NULL;
- if (__put_user(sig, &frame->sig))
- return -EFAULT;
+ frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
- if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+ if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
- if (_NSIG_WORDS > 1) {
- if (__copy_to_user(&frame->extramask, &set->sig[1],
- sizeof(frame->extramask)))
- return -EFAULT;
- }
-
+ unsafe_put_user(sig, &frame->sig, Efault);
+ unsafe_put_sigcontext(&frame->sc, fp, regs, set, Efault);
+ unsafe_put_user(set->sig[1], &frame->extramask[0], Efault);
if (current->mm->context.vdso)
restorer = current->mm->context.vdso +
vdso_image_32.sym___kernel_sigreturn;
@@ -341,7 +324,7 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
restorer = ksig->ka.sa.sa_restorer;
/* Set up to return from userspace. */
- err |= __put_user(restorer, &frame->pretcode);
+ unsafe_put_user(restorer, &frame->pretcode, Efault);
/*
* This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
@@ -350,10 +333,8 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
- err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
-
- if (err)
- return -EFAULT;
+ unsafe_put_user(*((u64 *)&retcode), (u64 *)frame->retcode, Efault);
+ user_access_end();
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
@@ -368,6 +349,10 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
regs->cs = __USER_CS;
return 0;
+
+Efault:
+ user_access_end();
+ return -EFAULT;
}
static int __setup_rt_frame(int sig, struct ksignal *ksig,
@@ -375,50 +360,45 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
{
struct rt_sigframe __user *frame;
void __user *restorer;
- int err = 0;
- void __user *fpstate = NULL;
+ void __user *fp = NULL;
- frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
+ frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
- if (!access_ok(frame, sizeof(*frame)))
+ if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
- put_user_try {
- put_user_ex(sig, &frame->sig);
- put_user_ex(&frame->info, &frame->pinfo);
- put_user_ex(&frame->uc, &frame->puc);
+ unsafe_put_user(sig, &frame->sig, Efault);
+ unsafe_put_user(&frame->info, &frame->pinfo, Efault);
+ unsafe_put_user(&frame->uc, &frame->puc, Efault);
- /* Create the ucontext. */
- if (static_cpu_has(X86_FEATURE_XSAVE))
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
- put_user_ex(0, &frame->uc.uc_link);
- save_altstack_ex(&frame->uc.uc_stack, regs->sp);
+ /* Create the ucontext. */
+ if (static_cpu_has(X86_FEATURE_XSAVE))
+ unsafe_put_user(UC_FP_XSTATE, &frame->uc.uc_flags, Efault);
+ else
+ unsafe_put_user(0, &frame->uc.uc_flags, Efault);
+ unsafe_put_user(0, &frame->uc.uc_link, Efault);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
- /* Set up to return from userspace. */
- restorer = current->mm->context.vdso +
- vdso_image_32.sym___kernel_rt_sigreturn;
- if (ksig->ka.sa.sa_flags & SA_RESTORER)
- restorer = ksig->ka.sa.sa_restorer;
- put_user_ex(restorer, &frame->pretcode);
+ /* Set up to return from userspace. */
+ restorer = current->mm->context.vdso +
+ vdso_image_32.sym___kernel_rt_sigreturn;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ restorer = ksig->ka.sa.sa_restorer;
+ unsafe_put_user(restorer, &frame->pretcode, Efault);
- /*
- * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
- *
- * WE DO NOT USE IT ANY MORE! It's only left here for historical
- * reasons and because gdb uses it as a signature to notice
- * signal handler stack frames.
- */
- put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
- } put_user_catch(err);
+ /*
+ * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
+ *
+ * WE DO NOT USE IT ANY MORE! It's only left here for historical
+ * reasons and because gdb uses it as a signature to notice
+ * signal handler stack frames.
+ */
+ unsafe_put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode, Efault);
+ unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
+ unsafe_put_sigmask(set, frame, Efault);
+ user_access_end();
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
- if (err)
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
return -EFAULT;
/* Set up registers for signal handler */
@@ -434,6 +414,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->cs = __USER_CS;
return 0;
+Efault:
+ user_access_end();
+ return -EFAULT;
}
#else /* !CONFIG_X86_32 */
static unsigned long frame_uc_flags(struct pt_regs *regs)
@@ -457,43 +440,34 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
struct rt_sigframe __user *frame;
void __user *fp = NULL;
unsigned long uc_flags;
- int err = 0;
+
+ /* x86-64 should always use SA_RESTORER. */
+ if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
+ return -EFAULT;
frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
+ uc_flags = frame_uc_flags(regs);
- if (!access_ok(frame, sizeof(*frame)))
+ if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
+ /* Create the ucontext. */
+ unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
+ unsafe_put_user(0, &frame->uc.uc_link, Efault);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ unsafe_put_user(ksig->ka.sa.sa_restorer, &frame->pretcode, Efault);
+ unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
+ unsafe_put_sigmask(set, frame, Efault);
+ user_access_end();
+
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user(&frame->info, &ksig->info))
return -EFAULT;
}
- uc_flags = frame_uc_flags(regs);
-
- put_user_try {
- /* Create the ucontext. */
- put_user_ex(uc_flags, &frame->uc.uc_flags);
- put_user_ex(0, &frame->uc.uc_link);
- save_altstack_ex(&frame->uc.uc_stack, regs->sp);
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- /* x86-64 should always use SA_RESTORER. */
- if (ksig->ka.sa.sa_flags & SA_RESTORER) {
- put_user_ex(ksig->ka.sa.sa_restorer, &frame->pretcode);
- } else {
- /* could use a vstub here */
- err |= -EFAULT;
- }
- } put_user_catch(err);
-
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
- if (err)
- return -EFAULT;
-
/* Set up registers for signal handler */
regs->di = sig;
/* In case the signal handler was declared without prototypes */
@@ -530,6 +504,10 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
force_valid_ss(regs);
return 0;
+
+Efault:
+ user_access_end();
+ return -EFAULT;
}
#endif /* CONFIG_X86_32 */
@@ -541,44 +519,33 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
struct rt_sigframe_x32 __user *frame;
unsigned long uc_flags;
void __user *restorer;
- int err = 0;
- void __user *fpstate = NULL;
-
- frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
+ void __user *fp = NULL;
- if (!access_ok(frame, sizeof(*frame)))
+ if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
return -EFAULT;
- if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
- return -EFAULT;
- }
+ frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
uc_flags = frame_uc_flags(regs);
- put_user_try {
- /* Create the ucontext. */
- put_user_ex(uc_flags, &frame->uc.uc_flags);
- put_user_ex(0, &frame->uc.uc_link);
- compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
- put_user_ex(0, &frame->uc.uc__pad0);
-
- if (ksig->ka.sa.sa_flags & SA_RESTORER) {
- restorer = ksig->ka.sa.sa_restorer;
- } else {
- /* could use a vstub here */
- restorer = NULL;
- err |= -EFAULT;
- }
- put_user_ex(restorer, (unsigned long __user *)&frame->pretcode);
- } put_user_catch(err);
+ if (!user_access_begin(frame, sizeof(*frame)))
+ return -EFAULT;
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ /* Create the ucontext. */
+ unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
+ unsafe_put_user(0, &frame->uc.uc_link, Efault);
+ unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
+ unsafe_put_user(0, &frame->uc.uc__pad0, Efault);
+ restorer = ksig->ka.sa.sa_restorer;
+ unsafe_put_user(restorer, (unsigned long __user *)&frame->pretcode, Efault);
+ unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
+ unsafe_put_sigmask(set, frame, Efault);
+ user_access_end();
- if (err)
- return -EFAULT;
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
+ return -EFAULT;
+ }
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
@@ -597,6 +564,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
#endif /* CONFIG_X86_X32_ABI */
return 0;
+#ifdef CONFIG_X86_X32_ABI
+Efault:
+ user_access_end();
+ return -EFAULT;
+#endif
}
/*
@@ -613,9 +585,8 @@ SYSCALL_DEFINE0(sigreturn)
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
- if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
- && __copy_from_user(&set.sig[1], &frame->extramask,
- sizeof(frame->extramask))))
+ if (__get_user(set.sig[0], &frame->sc.oldmask) ||
+ __get_user(set.sig[1], &frame->extramask[0]))
goto badframe;
set_current_blocked(&set);
@@ -645,7 +616,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ if (__get_user(*(__u64 *)&set, (__u64 __user *)&frame->uc.uc_sigmask))
goto badframe;
if (__get_user(uc_flags, &frame->uc.uc_flags))
goto badframe;
@@ -859,7 +830,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
}
#ifdef CONFIG_X86_X32_ABI
-asmlinkage long sys32_x32_rt_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
@@ -870,7 +841,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask))
goto badframe;
if (__get_user(uc_flags, &frame->uc.uc_flags))
goto badframe;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 69881b2d446c..fe3ab9632f3b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -147,6 +147,8 @@ static inline void smpboot_restore_warm_reset_vector(void)
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
+static void init_freq_invariance(void);
+
/*
* Report back to the Boot Processor during boot time or to the caller processor
* during CPU online.
@@ -183,6 +185,8 @@ static void smp_callin(void)
*/
set_cpu_sibling_map(raw_smp_processor_id());
+ init_freq_invariance();
+
/*
* Get our bogomips.
* Update loops_per_jiffy in cpu_data. Previous call to
@@ -466,7 +470,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
*/
static const struct x86_cpu_id snc_cpu[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X },
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL),
{}
};
@@ -1337,7 +1341,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_sched_topology(x86_topology);
set_cpu_sibling_map(0);
-
+ init_freq_invariance();
smp_sanity_check();
switch (apic_intr_mode) {
@@ -1434,7 +1438,7 @@ early_param("possible_cpus", _setup_possible_cpus);
/*
* cpu_possible_mask should be static, it cannot change as cpu's
* are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and dont expect to
+ * are allocated by some modules at init time, and don't expect to
* do this dynamically on cpu arrival/departure.
* cpu_present_mask on the other hand can change dynamically.
* In case when cpu_hotplug is not compiled, then we resort to current
@@ -1764,3 +1768,287 @@ void native_play_dead(void)
}
#endif
+
+/*
+ * APERF/MPERF frequency ratio computation.
+ *
+ * The scheduler wants to do frequency invariant accounting and needs a <1
+ * ratio to account for the 'current' frequency, corresponding to
+ * freq_curr / freq_max.
+ *
+ * Since the frequency freq_curr on x86 is controlled by micro-controller and
+ * our P-state setting is little more than a request/hint, we need to observe
+ * the effective frequency 'BusyMHz', i.e. the average frequency over a time
+ * interval after discarding idle time. This is given by:
+ *
+ * BusyMHz = delta_APERF / delta_MPERF * freq_base
+ *
+ * where freq_base is the max non-turbo P-state.
+ *
+ * The freq_max term has to be set to a somewhat arbitrary value, because we
+ * can't know which turbo states will be available at a given point in time:
+ * it all depends on the thermal headroom of the entire package. We set it to
+ * the turbo level with 4 cores active.
+ *
+ * Benchmarks show that's a good compromise between the 1C turbo ratio
+ * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
+ * which would ignore the entire turbo range (a conspicuous part, making
+ * freq_curr/freq_max always maxed out).
+ *
+ * An exception to the heuristic above is the Atom uarch, where we choose the
+ * highest turbo level for freq_max since Atom's are generally oriented towards
+ * power efficiency.
+ *
+ * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
+ * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
+ */
+
+DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
+
+static DEFINE_PER_CPU(u64, arch_prev_aperf);
+static DEFINE_PER_CPU(u64, arch_prev_mperf);
+static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
+static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
+
+void arch_set_max_freq_ratio(bool turbo_disabled)
+{
+ arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
+ arch_turbo_freq_ratio;
+}
+
+static bool turbo_disabled(void)
+{
+ u64 misc_en;
+ int err;
+
+ err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
+ if (err)
+ return false;
+
+ return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
+}
+
+static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+ int err;
+
+ err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
+ if (err)
+ return false;
+
+ err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
+ *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
+
+ return true;
+}
+
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+
+#define ICPU(model) \
+ {X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF, 0}
+
+static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
+ ICPU(INTEL_FAM6_XEON_PHI_KNL),
+ ICPU(INTEL_FAM6_XEON_PHI_KNM),
+ {}
+};
+
+static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
+ ICPU(INTEL_FAM6_SKYLAKE_X),
+ {}
+};
+
+static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
+ ICPU(INTEL_FAM6_ATOM_GOLDMONT),
+ ICPU(INTEL_FAM6_ATOM_GOLDMONT_D),
+ ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS),
+ {}
+};
+
+static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
+ int num_delta_fratio)
+{
+ int fratio, delta_fratio, found;
+ int err, i;
+ u64 msr;
+
+ if (!x86_match_cpu(has_knl_turbo_ratio_limits))
+ return false;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+ if (err)
+ return false;
+
+ fratio = (msr >> 8) & 0xFF;
+ i = 16;
+ found = 0;
+ do {
+ if (found >= num_delta_fratio) {
+ *turbo_freq = fratio;
+ return true;
+ }
+
+ delta_fratio = (msr >> (i + 5)) & 0x7;
+
+ if (delta_fratio) {
+ found += 1;
+ fratio -= delta_fratio;
+ }
+
+ i += 8;
+ } while (i < 64);
+
+ return true;
+}
+
+static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
+{
+ u64 ratios, counts;
+ u32 group_size;
+ int err, i;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
+ if (err)
+ return false;
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
+ if (err)
+ return false;
+
+ for (i = 0; i < 64; i += 8) {
+ group_size = (counts >> i) & 0xFF;
+ if (group_size >= size) {
+ *turbo_freq = (ratios >> i) & 0xFF;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+ int err;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ if (err)
+ return false;
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_freq);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+ *turbo_freq = (*turbo_freq >> 24) & 0xFF; /* 4C turbo */
+
+ return true;
+}
+
+static bool intel_set_max_freq_ratio(void)
+{
+ u64 base_freq, turbo_freq;
+
+ if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
+ goto out;
+
+ if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
+ skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+ goto out;
+
+ if (knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+ goto out;
+
+ if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
+ skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
+ goto out;
+
+ if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
+ goto out;
+
+ return false;
+
+out:
+ arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE,
+ base_freq);
+ arch_set_max_freq_ratio(turbo_disabled());
+ return true;
+}
+
+static void init_counter_refs(void *arg)
+{
+ u64 aperf, mperf;
+
+ rdmsrl(MSR_IA32_APERF, aperf);
+ rdmsrl(MSR_IA32_MPERF, mperf);
+
+ this_cpu_write(arch_prev_aperf, aperf);
+ this_cpu_write(arch_prev_mperf, mperf);
+}
+
+static void init_freq_invariance(void)
+{
+ bool ret = false;
+
+ if (smp_processor_id() != 0 || !boot_cpu_has(X86_FEATURE_APERFMPERF))
+ return;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ ret = intel_set_max_freq_ratio();
+
+ if (ret) {
+ on_each_cpu(init_counter_refs, NULL, 1);
+ static_branch_enable(&arch_scale_freq_key);
+ } else {
+ pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
+ }
+}
+
+DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+
+void arch_scale_freq_tick(void)
+{
+ u64 freq_scale;
+ u64 aperf, mperf;
+ u64 acnt, mcnt;
+
+ if (!arch_scale_freq_invariant())
+ return;
+
+ rdmsrl(MSR_IA32_APERF, aperf);
+ rdmsrl(MSR_IA32_MPERF, mperf);
+
+ acnt = aperf - this_cpu_read(arch_prev_aperf);
+ mcnt = mperf - this_cpu_read(arch_prev_mperf);
+ if (!mcnt)
+ return;
+
+ this_cpu_write(arch_prev_aperf, aperf);
+ this_cpu_write(arch_prev_mperf, mperf);
+
+ acnt <<= 2*SCHED_CAPACITY_SHIFT;
+ mcnt *= arch_max_freq_ratio;
+
+ freq_scale = div64_u64(acnt, mcnt);
+
+ if (freq_scale > SCHED_CAPACITY_SCALE)
+ freq_scale = SCHED_CAPACITY_SCALE;
+
+ this_cpu_write(arch_freq_scale, freq_scale);
+}
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 2d6898c2cb64..6ad43fc44556 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -96,7 +96,8 @@ struct stack_frame_user {
};
static int
-copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
+copy_stack_frame(const struct stack_frame_user __user *fp,
+ struct stack_frame_user *frame)
{
int ret;
@@ -105,7 +106,8 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
ret = 1;
pagefault_disable();
- if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+ if (__get_user(frame->next_fp, &fp->next_fp) ||
+ __get_user(frame->ret_addr, &fp->ret_addr))
ret = 0;
pagefault_enable();
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/kernel/sys_ia32.c
index 21790307121e..ab03fede1422 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/kernel/sys_ia32.c
@@ -51,20 +51,80 @@
#define AA(__x) ((unsigned long)(__x))
-
-COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
- unsigned long, offset_low, unsigned long, offset_high)
+SYSCALL_DEFINE3(ia32_truncate64, const char __user *, filename,
+ unsigned long, offset_low, unsigned long, offset_high)
{
return ksys_truncate(filename,
((loff_t) offset_high << 32) | offset_low);
}
-COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
- unsigned long, offset_low, unsigned long, offset_high)
+SYSCALL_DEFINE3(ia32_ftruncate64, unsigned int, fd,
+ unsigned long, offset_low, unsigned long, offset_high)
{
return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
}
+/* warning: next two assume little endian */
+SYSCALL_DEFINE5(ia32_pread64, unsigned int, fd, char __user *, ubuf,
+ u32, count, u32, poslo, u32, poshi)
+{
+ return ksys_pread64(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+SYSCALL_DEFINE5(ia32_pwrite64, unsigned int, fd, const char __user *, ubuf,
+ u32, count, u32, poslo, u32, poshi)
+{
+ return ksys_pwrite64(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+
+/*
+ * Some system calls that need sign extended arguments. This could be
+ * done by a generic wrapper.
+ */
+SYSCALL_DEFINE6(ia32_fadvise64_64, int, fd, __u32, offset_low,
+ __u32, offset_high, __u32, len_low, __u32, len_high,
+ int, advice)
+{
+ return ksys_fadvise64_64(fd,
+ (((u64)offset_high)<<32) | offset_low,
+ (((u64)len_high)<<32) | len_low,
+ advice);
+}
+
+SYSCALL_DEFINE4(ia32_readahead, int, fd, unsigned int, off_lo,
+ unsigned int, off_hi, size_t, count)
+{
+ return ksys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
+}
+
+SYSCALL_DEFINE6(ia32_sync_file_range, int, fd, unsigned int, off_low,
+ unsigned int, off_hi, unsigned int, n_low,
+ unsigned int, n_hi, int, flags)
+{
+ return ksys_sync_file_range(fd,
+ ((u64)off_hi << 32) | off_low,
+ ((u64)n_hi << 32) | n_low, flags);
+}
+
+SYSCALL_DEFINE5(ia32_fadvise64, int, fd, unsigned int, offset_lo,
+ unsigned int, offset_hi, size_t, len, int, advice)
+{
+ return ksys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
+ len, advice);
+}
+
+SYSCALL_DEFINE6(ia32_fallocate, int, fd, int, mode,
+ unsigned int, offset_lo, unsigned int, offset_hi,
+ unsigned int, len_lo, unsigned int, len_hi)
+{
+ return ksys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
+ ((u64)len_hi << 32) | len_lo);
+}
+
+#ifdef CONFIG_IA32_EMULATION
/*
* Another set for IA32/LFS -- x86_64 struct stat is different due to
* support for 64bit inode numbers.
@@ -97,7 +157,7 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
return 0;
}
-COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
+COMPAT_SYSCALL_DEFINE2(ia32_stat64, const char __user *, filename,
struct stat64 __user *, statbuf)
{
struct kstat stat;
@@ -108,7 +168,7 @@ COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
return ret;
}
-COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
+COMPAT_SYSCALL_DEFINE2(ia32_lstat64, const char __user *, filename,
struct stat64 __user *, statbuf)
{
struct kstat stat;
@@ -118,7 +178,7 @@ COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
return ret;
}
-COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
+COMPAT_SYSCALL_DEFINE2(ia32_fstat64, unsigned int, fd,
struct stat64 __user *, statbuf)
{
struct kstat stat;
@@ -128,7 +188,7 @@ COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
return ret;
}
-COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
+COMPAT_SYSCALL_DEFINE4(ia32_fstatat64, unsigned int, dfd,
const char __user *, filename,
struct stat64 __user *, statbuf, int, flag)
{
@@ -156,7 +216,7 @@ struct mmap_arg_struct32 {
unsigned int offset;
};
-COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
+COMPAT_SYSCALL_DEFINE1(ia32_mmap, struct mmap_arg_struct32 __user *, arg)
{
struct mmap_arg_struct32 a;
@@ -170,70 +230,10 @@ COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
a.offset>>PAGE_SHIFT);
}
-/* warning: next two assume little endian */
-COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
- u32, count, u32, poslo, u32, poshi)
-{
- return ksys_pread64(fd, ubuf, count,
- ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
- u32, count, u32, poslo, u32, poshi)
-{
- return ksys_pwrite64(fd, ubuf, count,
- ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-
-/*
- * Some system calls that need sign extended arguments. This could be
- * done by a generic wrapper.
- */
-COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
- __u32, offset_high, __u32, len_low, __u32, len_high,
- int, advice)
-{
- return ksys_fadvise64_64(fd,
- (((u64)offset_high)<<32) | offset_low,
- (((u64)len_high)<<32) | len_low,
- advice);
-}
-
-COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
- unsigned int, off_hi, size_t, count)
-{
- return ksys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
-}
-
-COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
- unsigned int, off_hi, unsigned int, n_low,
- unsigned int, n_hi, int, flags)
-{
- return ksys_sync_file_range(fd,
- ((u64)off_hi << 32) | off_low,
- ((u64)n_hi << 32) | n_low, flags);
-}
-
-COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
- unsigned int, offset_hi, size_t, len, int, advice)
-{
- return ksys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
- len, advice);
-}
-
-COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
- unsigned int, offset_lo, unsigned int, offset_hi,
- unsigned int, len_lo, unsigned int, len_hi)
-{
- return ksys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
- ((u64)len_hi << 32) | len_lo);
-}
-
/*
* The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
*/
-COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
+COMPAT_SYSCALL_DEFINE5(ia32_clone, unsigned long, clone_flags,
unsigned long, newsp, int __user *, parent_tidptr,
unsigned long, tls_val, int __user *, child_tidptr)
{
@@ -252,3 +252,4 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
return _do_fork(&args);
}
+#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index ca3c11a17b5a..504fa5425bce 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -21,7 +21,6 @@
#include <asm/elf.h>
#include <asm/ia32.h>
-#include <asm/syscalls.h>
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index d8673d8a779b..106e7f87f534 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -62,19 +62,16 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static struct irqaction irq0 = {
- .handler = timer_interrupt,
- .flags = IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
- .name = "timer"
-};
-
static void __init setup_default_timer_irq(void)
{
+ unsigned long flags = IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER;
+
/*
- * Unconditionally register the legacy timer; even without legacy
- * PIC/PIT we need this for the HPET0 in legacy replacement mode.
+ * Unconditionally register the legacy timer interrupt; even
+ * without legacy PIC/PIT we need this for the HPET0 in legacy
+ * replacement mode.
*/
- if (setup_irq(0, &irq0))
+ if (request_irq(0, timer_interrupt, flags, "timer", NULL))
pr_info("Failed to register legacy timer interrupt\n");
}
@@ -122,18 +119,12 @@ void __init time_init(void)
*/
void clocksource_arch_init(struct clocksource *cs)
{
- if (cs->archdata.vclock_mode == VCLOCK_NONE)
+ if (cs->vdso_clock_mode == VDSO_CLOCKMODE_NONE)
return;
- if (cs->archdata.vclock_mode > VCLOCK_MAX) {
- pr_warn("clocksource %s registered with invalid vclock_mode %d. Disabling vclock.\n",
- cs->name, cs->archdata.vclock_mode);
- cs->archdata.vclock_mode = VCLOCK_NONE;
- }
-
if (cs->mask != CLOCKSOURCE_MASK(64)) {
- pr_warn("clocksource %s registered with invalid mask %016llx. Disabling vclock.\n",
+ pr_warn("clocksource %s registered with invalid mask %016llx for VDSO. Disabling VDSO support.\n",
cs->name, cs->mask);
- cs->archdata.vclock_mode = VCLOCK_NONE;
+ cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
}
}
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index be5bc2e47c71..b8810ebbc8ae 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -59,39 +59,29 @@ __setup("cpu0_hotplug", enable_cpu0_hotplug);
*/
int _debug_hotplug_cpu(int cpu, int action)
{
- struct device *dev = get_cpu_device(cpu);
int ret;
if (!cpu_is_hotpluggable(cpu))
return -EINVAL;
- lock_device_hotplug();
-
switch (action) {
case 0:
- ret = cpu_down(cpu);
- if (!ret) {
+ ret = remove_cpu(cpu);
+ if (!ret)
pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu);
- dev->offline = true;
- kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
- } else
+ else
pr_debug("Can't offline CPU%d.\n", cpu);
break;
case 1:
- ret = cpu_up(cpu);
- if (!ret) {
- dev->offline = false;
- kobject_uevent(&dev->kobj, KOBJ_ONLINE);
- } else {
+ ret = add_cpu(cpu);
+ if (ret)
pr_debug("Can't online CPU%d.\n", cpu);
- }
+
break;
default:
ret = -EINVAL;
}
- unlock_device_hotplug();
-
return ret;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6ef00eb6fbb9..d54cffdc7cac 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -46,6 +46,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
+#include <asm/cpu.h>
#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
#include <asm/fixmap.h>
@@ -242,7 +243,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
{
struct task_struct *tsk = current;
-
if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
return;
@@ -288,9 +288,29 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overru
DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS)
DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present)
DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment)
-DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check)
#undef IP
+dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
+{
+ char *str = "alignment check";
+
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+
+ if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
+ return;
+
+ if (!user_mode(regs))
+ die("Split lock detected\n", regs, error_code);
+
+ local_irq_enable();
+
+ if (handle_user_split_lock(regs, error_code))
+ return;
+
+ do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
+ error_code, BUS_ADRALN, NULL);
+}
+
#ifdef CONFIG_VMAP_STACK
__visible void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
@@ -572,14 +592,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
return;
/*
- * Use ist_enter despite the fact that we don't use an IST stack.
- * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
- * mode or even during context tracking state changes.
+ * Unlike any other non-IST entry, we can be called from a kprobe in
+ * non-CONTEXT_KERNEL kernel mode or even during context tracking
+ * state changes. Make sure that we wake up RCU even if we're coming
+ * from kernel code.
*
- * This means that we can't schedule. That's okay.
+ * This means that we can't schedule even if we came from a
+ * preemptible kernel context. That's okay.
*/
- ist_enter(regs);
+ if (!user_mode(regs)) {
+ rcu_nmi_enter();
+ preempt_disable();
+ }
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
@@ -600,7 +626,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
cond_local_irq_disable(regs);
exit:
- ist_exit(regs);
+ if (!user_mode(regs)) {
+ preempt_enable_no_resched();
+ rcu_nmi_exit();
+ }
}
NOKPROBE_SYMBOL(do_int3);
@@ -862,7 +891,25 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
- cond_local_irq_enable(regs);
+ /*
+ * This addresses a Pentium Pro Erratum:
+ *
+ * PROBLEM: If the APIC subsystem is configured in mixed mode with
+ * Virtual Wire mode implemented through the local APIC, an
+ * interrupt vector of 0Fh (Intel reserved encoding) may be
+ * generated by the local APIC (Int 15). This vector may be
+ * generated upon receipt of a spurious interrupt (an interrupt
+ * which is removed before the system receives the INTA sequence)
+ * instead of the programmed 8259 spurious interrupt vector.
+ *
+ * IMPLICATION: The spurious interrupt vector programmed in the
+ * 8259 is normally handled by an operating system's spurious
+ * interrupt handler. However, a vector of 0Fh is unknown to some
+ * operating systems, which would crash if this erratum occurred.
+ *
+ * In theory this could be limited to 32bit, but the handler is not
+ * hurting and who knows which other CPUs suffer from this.
+ */
}
dotraplinkage void
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7e322e2daaf5..fdd4c1078632 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -477,7 +477,7 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
* transition from one expected value to another with a fairly
* high accuracy, and we didn't miss any events. We can thus
* use the TSC value at the transitions to calculate a pretty
- * good value for the TSC frequencty.
+ * good value for the TSC frequency.
*/
static inline int pit_verify_msb(unsigned char val)
{
@@ -1108,17 +1108,24 @@ static void tsc_cs_tick_stable(struct clocksource *cs)
sched_clock_tick_stable();
}
+static int tsc_cs_enable(struct clocksource *cs)
+{
+ vclocks_set_used(VDSO_CLOCKMODE_TSC);
+ return 0;
+}
+
/*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/
static struct clocksource clocksource_tsc_early = {
- .name = "tsc-early",
- .rating = 299,
- .read = read_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ .name = "tsc-early",
+ .rating = 299,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
- .archdata = { .vclock_mode = VCLOCK_TSC },
+ .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
+ .enable = tsc_cs_enable,
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
.tick_stable = tsc_cs_tick_stable,
@@ -1131,14 +1138,15 @@ static struct clocksource clocksource_tsc_early = {
* been found good.
*/
static struct clocksource clocksource_tsc = {
- .name = "tsc",
- .rating = 300,
- .read = read_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ .name = "tsc",
+ .rating = 300,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_VALID_FOR_HRES |
CLOCK_SOURCE_MUST_VERIFY,
- .archdata = { .vclock_mode = VCLOCK_TSC },
+ .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
+ .enable = tsc_cs_enable,
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
.tick_stable = tsc_cs_tick_stable,
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index e0cbe4f2af49..4fec6f3a1858 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -15,18 +15,46 @@
#include <asm/param.h>
#include <asm/tsc.h>
-#define MAX_NUM_FREQS 9
+#define MAX_NUM_FREQS 16 /* 4 bits to select the frequency */
+
+/*
+ * The frequency numbers in the SDM are e.g. 83.3 MHz, which does not contain a
+ * lot of accuracy which leads to clock drift. As far as we know Bay Trail SoCs
+ * use a 25 MHz crystal and Cherry Trail uses a 19.2 MHz crystal, the crystal
+ * is the source clk for a root PLL which outputs 1600 and 100 MHz. It is
+ * unclear if the root PLL outputs are used directly by the CPU clock PLL or
+ * if there is another PLL in between.
+ * This does not matter though, we can model the chain of PLLs as a single PLL
+ * with a quotient equal to the quotients of all PLLs in the chain multiplied.
+ * So we can create a simplified model of the CPU clock setup using a reference
+ * clock of 100 MHz plus a quotient which gets us as close to the frequency
+ * from the SDM as possible.
+ * For the 83.3 MHz example from above this would give us 100 MHz * 5 / 6 =
+ * 83 and 1/3 MHz, which matches exactly what has been measured on actual hw.
+ */
+#define TSC_REFERENCE_KHZ 100000
+
+struct muldiv {
+ u32 multiplier;
+ u32 divider;
+};
/*
* If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
* read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40].
* Unfortunately some Intel Atom SoCs aren't quite compliant to this,
* so we need manually differentiate SoC families. This is what the
- * field msr_plat does.
+ * field use_msr_plat does.
*/
struct freq_desc {
- u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */
+ bool use_msr_plat;
+ struct muldiv muldiv[MAX_NUM_FREQS];
+ /*
+ * Some CPU frequencies in the SDM do not map to known PLL freqs, in
+ * that case the muldiv array is empty and the freqs array is used.
+ */
u32 freqs[MAX_NUM_FREQS];
+ u32 mask;
};
/*
@@ -35,41 +63,91 @@ struct freq_desc {
* by MSR based on SDM.
*/
static const struct freq_desc freq_desc_pnw = {
- 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 }
+ .use_msr_plat = false,
+ .freqs = { 0, 0, 0, 0, 0, 99840, 0, 83200 },
+ .mask = 0x07,
};
static const struct freq_desc freq_desc_clv = {
- 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 }
+ .use_msr_plat = false,
+ .freqs = { 0, 133200, 0, 0, 0, 99840, 0, 83200 },
+ .mask = 0x07,
};
+/*
+ * Bay Trail SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 000: 100 * 5 / 6 = 83.3333 MHz
+ * 001: 100 * 1 / 1 = 100.0000 MHz
+ * 010: 100 * 4 / 3 = 133.3333 MHz
+ * 011: 100 * 7 / 6 = 116.6667 MHz
+ * 100: 100 * 4 / 5 = 80.0000 MHz
+ */
static const struct freq_desc freq_desc_byt = {
- 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 }
+ .use_msr_plat = true,
+ .muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 },
+ { 4, 5 } },
+ .mask = 0x07,
};
+/*
+ * Cherry Trail SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 0000: 100 * 5 / 6 = 83.3333 MHz
+ * 0001: 100 * 1 / 1 = 100.0000 MHz
+ * 0010: 100 * 4 / 3 = 133.3333 MHz
+ * 0011: 100 * 7 / 6 = 116.6667 MHz
+ * 0100: 100 * 4 / 5 = 80.0000 MHz
+ * 0101: 100 * 14 / 15 = 93.3333 MHz
+ * 0110: 100 * 9 / 10 = 90.0000 MHz
+ * 0111: 100 * 8 / 9 = 88.8889 MHz
+ * 1000: 100 * 7 / 8 = 87.5000 MHz
+ */
static const struct freq_desc freq_desc_cht = {
- 1, { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500 }
+ .use_msr_plat = true,
+ .muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 },
+ { 4, 5 }, { 14, 15 }, { 9, 10 }, { 8, 9 },
+ { 7, 8 } },
+ .mask = 0x0f,
};
+/*
+ * Merriefield SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 0001: 100 * 1 / 1 = 100.0000 MHz
+ * 0010: 100 * 4 / 3 = 133.3333 MHz
+ */
static const struct freq_desc freq_desc_tng = {
- 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 }
+ .use_msr_plat = true,
+ .muldiv = { { 0, 0 }, { 1, 1 }, { 4, 3 } },
+ .mask = 0x07,
};
+/*
+ * Moorefield SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 0000: 100 * 5 / 6 = 83.3333 MHz
+ * 0001: 100 * 1 / 1 = 100.0000 MHz
+ * 0010: 100 * 4 / 3 = 133.3333 MHz
+ * 0011: 100 * 1 / 1 = 100.0000 MHz
+ */
static const struct freq_desc freq_desc_ann = {
- 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 }
+ .use_msr_plat = true,
+ .muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 1, 1 } },
+ .mask = 0x0f,
};
+/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */
static const struct freq_desc freq_desc_lgm = {
- 1, { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }
+ .use_msr_plat = true,
+ .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
+ .mask = 0x0f,
};
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
- INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw),
- INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv),
- INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt),
- INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng),
- INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
- INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann),
- INTEL_CPU_FAM6(ATOM_AIRMONT_NP, freq_desc_lgm),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, &freq_desc_pnw),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_TABLET,&freq_desc_clv),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &freq_desc_byt),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &freq_desc_tng),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &freq_desc_cht),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &freq_desc_ann),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_NP, &freq_desc_lgm),
{}
};
@@ -81,17 +159,19 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
*/
unsigned long cpu_khz_from_msr(void)
{
- u32 lo, hi, ratio, freq;
+ u32 lo, hi, ratio, freq, tscref;
const struct freq_desc *freq_desc;
const struct x86_cpu_id *id;
+ const struct muldiv *md;
unsigned long res;
+ int index;
id = x86_match_cpu(tsc_msr_cpu_ids);
if (!id)
return 0;
freq_desc = (struct freq_desc *)id->driver_data;
- if (freq_desc->msr_plat) {
+ if (freq_desc->use_msr_plat) {
rdmsr(MSR_PLATFORM_INFO, lo, hi);
ratio = (lo >> 8) & 0xff;
} else {
@@ -101,12 +181,28 @@ unsigned long cpu_khz_from_msr(void)
/* Get FSB FREQ ID */
rdmsr(MSR_FSB_FREQ, lo, hi);
+ index = lo & freq_desc->mask;
+ md = &freq_desc->muldiv[index];
- /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
- freq = freq_desc->freqs[lo & 0x7];
+ /*
+ * Note this also catches cases where the index points to an unpopulated
+ * part of muldiv, in that case the else will set freq and res to 0.
+ */
+ if (md->divider) {
+ tscref = TSC_REFERENCE_KHZ * md->multiplier;
+ freq = DIV_ROUND_CLOSEST(tscref, md->divider);
+ /*
+ * Multiplying by ratio before the division has better
+ * accuracy than just calculating freq * ratio.
+ */
+ res = DIV_ROUND_CLOSEST(tscref * ratio, md->divider);
+ } else {
+ freq = freq_desc->freqs[index];
+ res = freq * ratio;
+ }
- /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
- res = freq * ratio;
+ if (freq == 0)
+ pr_err("Error MSR_FSB_FREQ index %d is unknown\n", index);
#ifdef CONFIG_X86_LOCAL_APIC
lapic_timer_period = (freq * 1000) / HZ;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 32a818764e03..3d3c761eb74a 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -295,7 +295,7 @@ static cycles_t check_tsc_warp(unsigned int timeout)
* But as the TSC is per-logical CPU and can potentially be modified wrongly
* by the bios, TSC sync test for smaller duration should be able
* to catch such errors. Also this will catch the condition where all the
- * cores in the socket doesn't get reset at the same time.
+ * cores in the socket don't get reset at the same time.
*/
static inline unsigned int loop_timeout(int cpu)
{
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 91d55454e702..47a8676c7395 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -98,7 +98,6 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
struct task_struct *tsk = current;
struct vm86plus_struct __user *user;
struct vm86 *vm86 = current->thread.vm86;
- long err = 0;
/*
* This gets called from entry.S with interrupts disabled, but
@@ -114,37 +113,30 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
user = vm86->user_vm86;
- if (!access_ok(user, vm86->vm86plus.is_vm86pus ?
+ if (!user_access_begin(user, vm86->vm86plus.is_vm86pus ?
sizeof(struct vm86plus_struct) :
- sizeof(struct vm86_struct))) {
- pr_alert("could not access userspace vm86 info\n");
- do_exit(SIGSEGV);
- }
-
- put_user_try {
- put_user_ex(regs->pt.bx, &user->regs.ebx);
- put_user_ex(regs->pt.cx, &user->regs.ecx);
- put_user_ex(regs->pt.dx, &user->regs.edx);
- put_user_ex(regs->pt.si, &user->regs.esi);
- put_user_ex(regs->pt.di, &user->regs.edi);
- put_user_ex(regs->pt.bp, &user->regs.ebp);
- put_user_ex(regs->pt.ax, &user->regs.eax);
- put_user_ex(regs->pt.ip, &user->regs.eip);
- put_user_ex(regs->pt.cs, &user->regs.cs);
- put_user_ex(regs->pt.flags, &user->regs.eflags);
- put_user_ex(regs->pt.sp, &user->regs.esp);
- put_user_ex(regs->pt.ss, &user->regs.ss);
- put_user_ex(regs->es, &user->regs.es);
- put_user_ex(regs->ds, &user->regs.ds);
- put_user_ex(regs->fs, &user->regs.fs);
- put_user_ex(regs->gs, &user->regs.gs);
-
- put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
- } put_user_catch(err);
- if (err) {
- pr_alert("could not access userspace vm86 info\n");
- do_exit(SIGSEGV);
- }
+ sizeof(struct vm86_struct)))
+ goto Efault;
+
+ unsafe_put_user(regs->pt.bx, &user->regs.ebx, Efault_end);
+ unsafe_put_user(regs->pt.cx, &user->regs.ecx, Efault_end);
+ unsafe_put_user(regs->pt.dx, &user->regs.edx, Efault_end);
+ unsafe_put_user(regs->pt.si, &user->regs.esi, Efault_end);
+ unsafe_put_user(regs->pt.di, &user->regs.edi, Efault_end);
+ unsafe_put_user(regs->pt.bp, &user->regs.ebp, Efault_end);
+ unsafe_put_user(regs->pt.ax, &user->regs.eax, Efault_end);
+ unsafe_put_user(regs->pt.ip, &user->regs.eip, Efault_end);
+ unsafe_put_user(regs->pt.cs, &user->regs.cs, Efault_end);
+ unsafe_put_user(regs->pt.flags, &user->regs.eflags, Efault_end);
+ unsafe_put_user(regs->pt.sp, &user->regs.esp, Efault_end);
+ unsafe_put_user(regs->pt.ss, &user->regs.ss, Efault_end);
+ unsafe_put_user(regs->es, &user->regs.es, Efault_end);
+ unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
+ unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
+ unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
+ unsafe_put_user(vm86->screen_bitmap, &user->screen_bitmap, Efault_end);
+
+ user_access_end();
preempt_disable();
tsk->thread.sp0 = vm86->saved_sp0;
@@ -159,6 +151,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
lazy_load_gs(vm86->regs32.gs);
regs->pt.ax = retval;
+ return;
+
+Efault_end:
+ user_access_end();
+Efault:
+ pr_alert("could not access userspace vm86 info\n");
+ do_exit(SIGSEGV);
}
static void mark_screen_rdonly(struct mm_struct *mm)
@@ -243,6 +242,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
struct kernel_vm86_regs vm86regs;
struct pt_regs *regs = current_pt_regs();
unsigned long err = 0;
+ struct vm86_struct v;
err = security_mmap_addr(0);
if (err) {
@@ -278,39 +278,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
if (vm86->saved_sp0)
return -EPERM;
- if (!access_ok(user_vm86, plus ?
- sizeof(struct vm86_struct) :
- sizeof(struct vm86plus_struct)))
+ if (copy_from_user(&v, user_vm86,
+ offsetof(struct vm86_struct, int_revectored)))
return -EFAULT;
memset(&vm86regs, 0, sizeof(vm86regs));
- get_user_try {
- unsigned short seg;
- get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
- get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
- get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
- get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
- get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
- get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
- get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
- get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
- get_user_ex(seg, &user_vm86->regs.cs);
- vm86regs.pt.cs = seg;
- get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
- get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
- get_user_ex(seg, &user_vm86->regs.ss);
- vm86regs.pt.ss = seg;
- get_user_ex(vm86regs.es, &user_vm86->regs.es);
- get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
- get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
- get_user_ex(vm86regs.gs, &user_vm86->regs.gs);
-
- get_user_ex(vm86->flags, &user_vm86->flags);
- get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
- get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
- } get_user_catch(err);
- if (err)
- return err;
+
+ vm86regs.pt.bx = v.regs.ebx;
+ vm86regs.pt.cx = v.regs.ecx;
+ vm86regs.pt.dx = v.regs.edx;
+ vm86regs.pt.si = v.regs.esi;
+ vm86regs.pt.di = v.regs.edi;
+ vm86regs.pt.bp = v.regs.ebp;
+ vm86regs.pt.ax = v.regs.eax;
+ vm86regs.pt.ip = v.regs.eip;
+ vm86regs.pt.cs = v.regs.cs;
+ vm86regs.pt.flags = v.regs.eflags;
+ vm86regs.pt.sp = v.regs.esp;
+ vm86regs.pt.ss = v.regs.ss;
+ vm86regs.es = v.regs.es;
+ vm86regs.ds = v.regs.ds;
+ vm86regs.fs = v.regs.fs;
+ vm86regs.gs = v.regs.gs;
+
+ vm86->flags = v.flags;
+ vm86->screen_bitmap = v.screen_bitmap;
+ vm86->cpu_type = v.cpu_type;
if (copy_from_user(&vm86->int_revectored,
&user_vm86->int_revectored,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e3296aa028fe..1bf7e312361f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -21,6 +21,7 @@
#define LOAD_OFFSET __START_KERNEL_map
#endif
+#define RUNTIME_DISCARD_EXIT
#define EMITS_PT_NOTE
#define RO_EXCEPTION_TABLE_ALIGN 16
@@ -313,8 +314,8 @@ SECTIONS
. = ALIGN(8);
/*
- * .exit.text is discard at runtime, not link time, to deal with
- * references from .altinstructions and .eh_frame
+ * .exit.text is discarded at runtime, not link time, to deal with
+ * references from .altinstructions
*/
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
EXIT_TEXT
@@ -412,9 +413,6 @@ SECTIONS
DWARF_DEBUG
DISCARDS
- /DISCARD/ : {
- *(.eh_frame)
- }
}
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 991019d5eee1..9fea0757db92 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -59,6 +59,19 @@ config KVM
If unsure, say N.
+config KVM_WERROR
+ bool "Compile KVM with -Werror"
+ # KASAN may cause the build to fail due to larger frames
+ default y if X86_64 && !KASAN
+ # We use the dependency on !COMPILE_TEST to not be enabled
+ # blindly in allmodconfig or allyesconfig configurations
+ depends on (X86_64 && !KASAN) || !COMPILE_TEST
+ depends on EXPERT
+ help
+ Add -Werror to the build flags for KVM.
+
+ If in doubt, say "N".
+
config KVM_INTEL
tristate "KVM for Intel (and compatible) processors support"
depends on KVM && IA32_FEAT_CTL
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b19ef421084d..e553f0fdd87d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y += -Iarch/x86/kvm
+ccflags-$(CONFIG_KVM_WERROR) += -Werror
KVM := ../../../virt/kvm
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ddbc61984227..bc00642e5d3b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -191,25 +191,6 @@
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8
-/*
- * fastop functions have a special calling convention:
- *
- * dst: rax (in/out)
- * src: rdx (in/out)
- * src2: rcx (in)
- * flags: rflags (in/out)
- * ex: rsi (in:fastop pointer, out:zero if exception)
- *
- * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
- * different operand sizes can be reached by calculation, rather than a jump
- * table (which would be bigger than the code).
- *
- * fastop functions are declared as taking a never-defined fastop parameter,
- * so they can't be called from C directly.
- */
-
-struct fastop;
-
struct opcode {
u64 flags : 56;
u64 intercept : 8;
@@ -311,8 +292,19 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
#define ON64(x)
#endif
-typedef void (*fastop_t)(struct fastop *);
-
+/*
+ * fastop functions have a special calling convention:
+ *
+ * dst: rax (in/out)
+ * src: rdx (in/out)
+ * src2: rcx (in)
+ * flags: rflags (in/out)
+ * ex: rsi (in:fastop pointer, out:zero if exception)
+ *
+ * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
+ * different operand sizes can be reached by calculation, rather than a jump
+ * table (which would be bigger than the code).
+ */
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define __FOP_FUNC(name) \
@@ -5181,6 +5173,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
ctxt->fetch.ptr = ctxt->fetch.data;
ctxt->fetch.end = ctxt->fetch.data + insn_len;
ctxt->opcode_len = 1;
+ ctxt->intercept = x86_intercept_none;
if (insn_len > 0)
memcpy(ctxt->fetch.data, insn, insn_len);
else {
@@ -5683,7 +5676,7 @@ special_insn:
if (ctxt->execute) {
if (ctxt->d & Fastop)
- rc = fastop(ctxt, (fastop_t)ctxt->execute);
+ rc = fastop(ctxt, ctxt->fop);
else
rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 7668fed1ce65..750ff0b29404 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -378,12 +378,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.delivery_mode == APIC_DM_FIXED) {
struct kvm_lapic_irq irq;
- irq.shorthand = APIC_DEST_NOSHORT;
irq.vector = e->fields.vector;
irq.delivery_mode = e->fields.delivery_mode << 8;
- irq.dest_id = e->fields.dest_id;
irq.dest_mode =
kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
+ irq.level = false;
+ irq.trig_mode = e->fields.trig_mode;
+ irq.shorthand = APIC_DEST_NOSHORT;
+ irq.dest_id = e->fields.dest_id;
+ irq.msi_redir_hint = false;
bitmap_zero(&vcpu_bitmap, 16);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
&vcpu_bitmap);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 79afa0bb5f41..c47d2acec529 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -417,7 +417,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
- if (irq.level &&
+ if (irq.trig_mode &&
kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
irq.dest_id, irq.dest_mode))
__set_bit(irq.vector, ioapic_handled_vectors);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index eafc631d305c..7356a56e6282 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -627,9 +627,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
- if (pv_eoi_get_user(vcpu, &val) < 0)
+ if (pv_eoi_get_user(vcpu, &val) < 0) {
printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
+ return false;
+ }
return val & 0x1;
}
@@ -1046,11 +1048,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (vcpu->arch.apicv_active)
- kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
- else {
+ if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
-
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
@@ -1080,9 +1079,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
result = 1;
/* assumes that there are only KVM_APIC_INIT/SIPI */
apic->pending_events = (1UL << KVM_APIC_INIT);
- /* make sure pending_events is visible before sending
- * the request */
- smp_wmb();
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
@@ -1449,6 +1445,8 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
}
}
+static void cancel_hv_timer(struct kvm_lapic *apic);
+
static void apic_update_lvtt(struct kvm_lapic *apic)
{
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1458,6 +1456,10 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
APIC_LVT_TIMER_TSCDEADLINE)) {
hrtimer_cancel(&apic->lapic_timer.timer);
+ preempt_disable();
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
apic->lapic_timer.period = 0;
apic->lapic_timer.tscdeadline = 0;
@@ -1719,7 +1721,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d55674f44a18..a647601c9e1c 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -102,6 +102,19 @@ static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
kvm_get_active_pcid(vcpu));
}
+int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ bool prefault);
+
+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u32 err, bool prefault)
+{
+#ifdef CONFIG_RETPOLINE
+ if (likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault))
+ return kvm_tdp_page_fault(vcpu, cr2_or_gpa, err, prefault);
+#endif
+ return vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, err, prefault);
+}
+
/*
* Currently, we have two sorts of write-protection, a) the first one
* write-protects guest page to sync the guest modification, b) another one is
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7011a4e54866..87e9ba27ada1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4219,8 +4219,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
- bool prefault)
+int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ bool prefault)
{
int max_level;
@@ -4925,7 +4925,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
return;
context->mmu_role.as_u64 = new_role.as_u64;
- context->page_fault = tdp_page_fault;
+ context->page_fault = kvm_tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
@@ -5436,9 +5436,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
}
if (r == RET_PF_INVALID) {
- r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
- lower_32_bits(error_code),
- false);
+ r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
+ lower_32_bits(error_code), false);
WARN_ON(r == RET_PF_INVALID);
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 4e1ef0473663..21a3320f166a 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -33,7 +33,7 @@
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
#define PT_HAVE_ACCESSED_DIRTY(mmu) true
#ifdef CONFIG_X86_64
- #define PT_MAX_FULL_LEVELS 4
+ #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
#define CMPXCHG cmpxchg
#else
#define CMPXCHG cmpxchg64
@@ -400,7 +400,7 @@ retry_walk:
goto error;
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
- if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
+ if (unlikely(__get_user(pte, ptep_user)))
goto error;
walker->ptep_user[walker->level - 1] = ptep_user;
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 3c6522b84ff1..ffcd96fc02d0 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -339,7 +339,7 @@ TRACE_EVENT(
/* These depend on page entry type, so compute them now. */
__field(bool, r)
__field(bool, x)
- __field(u8, u)
+ __field(signed char, u)
),
TP_fast_assign(
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a3e32d61d60c..216364cb65a3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -48,6 +48,7 @@
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/spec-ctrl.h>
+#include <asm/cpu_device_id.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -57,11 +58,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+#ifdef MODULE
static const struct x86_cpu_id svm_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_SVM),
+ X86_MATCH_FEATURE(X86_FEATURE_SVM, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
+#endif
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
@@ -1005,33 +1008,32 @@ static void svm_cpu_uninit(int cpu)
static int svm_cpu_init(int cpu)
{
struct svm_cpu_data *sd;
- int r;
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
- r = -ENOMEM;
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
- goto err_1;
+ goto free_cpu_data;
if (svm_sev_enabled()) {
- r = -ENOMEM;
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
sizeof(void *),
GFP_KERNEL);
if (!sd->sev_vmcbs)
- goto err_1;
+ goto free_save_area;
}
per_cpu(svm_data, cpu) = sd;
return 0;
-err_1:
+free_save_area:
+ __free_page(sd->save_area);
+free_cpu_data:
kfree(sd);
- return r;
+ return -ENOMEM;
}
@@ -1350,6 +1352,24 @@ static __init void svm_adjust_mmio_mask(void)
kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
}
+static void svm_hardware_teardown(void)
+{
+ int cpu;
+
+ if (svm_sev_enabled()) {
+ bitmap_free(sev_asid_bitmap);
+ bitmap_free(sev_reclaim_asid_bitmap);
+
+ sev_flush_asids();
+ }
+
+ for_each_possible_cpu(cpu)
+ svm_cpu_uninit(cpu);
+
+ __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
+ iopm_base = 0;
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1463,29 +1483,10 @@ static __init int svm_hardware_setup(void)
return 0;
err:
- __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
- iopm_base = 0;
+ svm_hardware_teardown();
return r;
}
-static __exit void svm_hardware_unsetup(void)
-{
- int cpu;
-
- if (svm_sev_enabled()) {
- bitmap_free(sev_asid_bitmap);
- bitmap_free(sev_reclaim_asid_bitmap);
-
- sev_flush_asids();
- }
-
- for_each_possible_cpu(cpu)
- svm_cpu_uninit(cpu);
-
- __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
- iopm_base = 0;
-}
-
static void init_seg(struct vmcb_seg *seg)
{
seg->selector = 0;
@@ -1933,14 +1934,6 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
static void __unregister_enc_region_locked(struct kvm *kvm,
struct enc_region *region)
{
- /*
- * The guest may change the memory encryption attribute from C=0 -> C=1
- * or vice versa for this memory range. Lets make sure caches are
- * flushed to ensure that guest data gets written into memory with
- * correct C-bit.
- */
- sev_clflush_pages(region->pages, region->npages);
-
sev_unpin_memory(kvm, region->pages, region->npages);
list_del(&region->list);
kfree(region);
@@ -1971,6 +1964,13 @@ static void sev_vm_destroy(struct kvm *kvm)
mutex_lock(&kvm->lock);
/*
+ * Ensure that all guest tagged cache entries are flushed before
+ * releasing the pages back to the system for use. CLFLUSH will
+ * not do this, so issue a WBINVD.
+ */
+ wbinvd_on_all_cpus();
+
+ /*
* if userspace was terminated before unregistering the memory regions
* then lets unpin all the registered memory.
*/
@@ -2175,7 +2175,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 dummy;
u32 eax = 1;
- vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
svm->virt_spec_ctrl = 0;
@@ -2197,8 +2196,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static int avic_init_vcpu(struct vcpu_svm *svm)
{
int ret;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
- if (!kvm_vcpu_apicv_active(&svm->vcpu))
+ if (!avic || !irqchip_in_kernel(vcpu->kvm))
return 0;
ret = avic_init_backing_page(&svm->vcpu);
@@ -2266,6 +2266,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
init_vmcb(svm);
svm_init_osvw(vcpu);
+ vcpu->arch.microcode_version = 0x01000065;
return 0;
@@ -5232,6 +5233,9 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb;
bool activated = kvm_vcpu_apicv_active(vcpu);
+ if (!avic)
+ return;
+
if (activated) {
/**
* During AVIC temporary deactivation, guest could update
@@ -5255,8 +5259,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
+ if (!vcpu->arch.apicv_active)
+ return -1;
+
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
@@ -5268,6 +5275,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
+
+ return 0;
}
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
@@ -6303,7 +6312,8 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion *exit_fastpath)
{
if (!is_guest_mode(vcpu) &&
- to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
+ to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ to_svm(vcpu)->vmcb->control.exit_info_1)
*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
}
@@ -7148,6 +7158,9 @@ static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
if (!svm_sev_enabled())
return -ENOTTY;
+ if (!argp)
+ return 0;
+
if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
return -EFAULT;
@@ -7275,6 +7288,13 @@ static int svm_unregister_enc_region(struct kvm *kvm,
goto failed;
}
+ /*
+ * Ensure that all guest tagged cache entries are flushed before
+ * releasing the pages back to the system for use. CLFLUSH will
+ * not do this, so issue a WBINVD.
+ */
+ wbinvd_on_all_cpus();
+
__unregister_enc_region_locked(kvm, region);
mutex_unlock(&kvm->lock);
@@ -7378,7 +7398,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
.hardware_setup = svm_hardware_setup,
- .hardware_unsetup = svm_hardware_unsetup,
+ .hardware_unsetup = svm_hardware_teardown,
.check_processor_compatibility = svm_check_processor_compat,
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
@@ -7433,6 +7453,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
+ .update_emulated_instruction = NULL,
.set_interrupt_shadow = svm_set_interrupt_shadow,
.get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index f194dd058470..cef5a344fedb 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -815,8 +815,8 @@ TRACE_EVENT(kvm_write_tsc_offset,
#ifdef CONFIG_X86_64
#define host_clocks \
- {VCLOCK_NONE, "none"}, \
- {VCLOCK_TSC, "tsc"} \
+ {VDSO_CLOCKMODE_NONE, "none"}, \
+ {VDSO_CLOCKMODE_TSC, "tsc"} \
TRACE_EVENT(kvm_update_master_clock,
TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 283bdb7071af..f486e2606247 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
+extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 657c2eda357c..9750e590c89d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -224,7 +224,7 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
return;
kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
- vmx->nested.hv_evmcs_vmptr = -1ull;
+ vmx->nested.hv_evmcs_vmptr = 0;
vmx->nested.hv_evmcs = NULL;
}
@@ -544,7 +544,8 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
-static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
+{
int msr;
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
@@ -1922,7 +1923,8 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
return 1;
- if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
+ if (unlikely(!vmx->nested.hv_evmcs ||
+ evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
if (!vmx->nested.hv_evmcs)
vmx->nested.current_vmptr = -1ull;
@@ -1981,7 +1983,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
}
/*
- * Clean fields data can't de used on VMLAUNCH and when we switch
+ * Clean fields data can't be used on VMLAUNCH and when we switch
* between different L2 guests as KVM keeps a single VMCS12 per L1.
*/
if (from_launch || evmcs_gpa_changed)
@@ -3160,10 +3162,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
*
* Returns:
- * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
- * NVMX_ENTRY_VMFAIL: Consistency check VMFail
- * NVMX_ENTRY_VMEXIT: Consistency check VMExit
- * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
+ * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
+ * NVMX_VMENTRY_VMFAIL: Consistency check VMFail
+ * NVMX_VMENTRY_VMEXIT: Consistency check VMExit
+ * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
*/
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry)
@@ -3575,25 +3577,80 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
}
+/*
+ * Returns true if a debug trap is pending delivery.
+ *
+ * In KVM, debug traps bear an exception payload. As such, the class of a #DB
+ * exception may be inferred from the presence of an exception payload.
+ */
+static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.exception.pending &&
+ vcpu->arch.exception.nr == DB_VECTOR &&
+ vcpu->arch.exception.payload;
+}
+
+/*
+ * Certain VM-exits set the 'pending debug exceptions' field to indicate a
+ * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
+ * represents these debug traps with a payload that is said to be compatible
+ * with the 'pending debug exceptions' field, write the payload to the VMCS
+ * field if a VM-exit is delivered before the debug trap.
+ */
+static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
+{
+ if (vmx_pending_dbg_trap(vcpu))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vcpu->arch.exception.payload);
+}
+
static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qual;
bool block_nested_events =
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
+ bool mtf_pending = vmx->nested.mtf_pending;
struct kvm_lapic *apic = vcpu->arch.apic;
+ /*
+ * Clear the MTF state. If a higher priority VM-exit is delivered first,
+ * this state is discarded.
+ */
+ vmx->nested.mtf_pending = false;
+
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
if (block_nested_events)
return -EBUSY;
+ nested_vmx_update_pending_dbg(vcpu);
clear_bit(KVM_APIC_INIT, &apic->pending_events);
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
return 0;
}
+ /*
+ * Process any exceptions that are not debug traps before MTF.
+ */
+ if (vcpu->arch.exception.pending &&
+ !vmx_pending_dbg_trap(vcpu) &&
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+ return 0;
+ }
+
+ if (mtf_pending) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_update_pending_dbg(vcpu);
+ nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
+ return 0;
+ }
+
if (vcpu->arch.exception.pending &&
- nested_vmx_check_exception(vcpu, &exit_qual)) {
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
@@ -5256,24 +5313,17 @@ fail:
return 1;
}
-
-static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+/*
+ * Return true if an IO instruction with the specified port and size should cause
+ * a VM-exit into L1.
+ */
+bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
+ int size)
{
- unsigned long exit_qualification;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
gpa_t bitmap, last_bitmap;
- unsigned int port;
- int size;
u8 b;
- if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
- return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
-
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-
- port = exit_qualification >> 16;
- size = (exit_qualification & 7) + 1;
-
last_bitmap = (gpa_t)-1;
b = -1;
@@ -5300,8 +5350,26 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
return false;
}
+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ unsigned long exit_qualification;
+ unsigned short port;
+ int size;
+
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
+
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+ port = exit_qualification >> 16;
+ size = (exit_qualification & 7) + 1;
+
+ return nested_vmx_check_io_bitmaps(vcpu, port, size);
+}
+
/*
- * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
+ * Return 1 if we should exit from L2 to L1 to handle an MSR access,
* rather than handle it ourselves in L0. I.e., check whether L1 expressed
* disinterest in the current event (read or write a specific MSR) by using an
* MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
@@ -5683,6 +5751,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx->nested.nested_run_pending)
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+
+ if (vmx->nested.mtf_pending)
+ kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
}
}
@@ -5863,6 +5934,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmx->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+ vmx->nested.mtf_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
+
ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
@@ -5920,8 +5994,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5948,7 +6021,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index fc874d4ead0f..9aeda46f473e 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv);
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
@@ -34,6 +33,8 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
+bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
+ int size);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
@@ -175,6 +176,11 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
+static inline int nested_cpu_has_mtf(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9a6664886f2e..458e684dfbdc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/asm.h>
#include <asm/cpu.h>
+#include <asm/cpu_device_id.h>
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
@@ -41,6 +42,7 @@
#include <asm/mce.h>
#include <asm/mmu_context.h>
#include <asm/mshyperv.h>
+#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include <asm/vmx.h>
@@ -64,11 +66,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+#ifdef MODULE
static const struct x86_cpu_id vmx_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_VMX),
+ X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
+#endif
bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
@@ -95,7 +99,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -1175,6 +1179,10 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->guest_msrs[i].mask);
}
+
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
+ nested_sync_vmcs12_to_shadow(vcpu);
+
if (vmx->guest_state_loaded)
return;
@@ -1599,6 +1607,40 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
return 1;
}
+
+/*
+ * Recognizes a pending MTF VM-exit and records the nested state for later
+ * delivery.
+ */
+static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!is_guest_mode(vcpu))
+ return;
+
+ /*
+ * Per the SDM, MTF takes priority over debug-trap exceptions besides
+ * T-bit traps. As instruction emulation is completed (i.e. at the
+ * instruction boundary), any #DB exception pending delivery must be a
+ * debug-trap. Record the pending MTF state to be delivered in
+ * vmx_check_nested_events().
+ */
+ if (nested_cpu_has_mtf(vmcs12) &&
+ (!vcpu->arch.exception.pending ||
+ vcpu->arch.exception.nr == DB_VECTOR))
+ vmx->nested.mtf_pending = true;
+ else
+ vmx->nested.mtf_pending = false;
+}
+
+static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ vmx_update_emulated_instruction(vcpu);
+ return skip_emulated_instruction(vcpu);
+}
+
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{
/*
@@ -2298,6 +2340,17 @@ static void hardware_disable(void)
kvm_cpu_vmxoff();
}
+/*
+ * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID
+ * directly instead of going through cpu_has(), to ensure KVM is trapping
+ * ENCLS whenever it's supported in hardware. It does not matter whether
+ * the host OS supports or has enabled SGX.
+ */
+static bool cpu_has_sgx(void)
+{
+ return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0));
+}
+
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
@@ -2378,8 +2431,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_PT_USE_GPA |
SECONDARY_EXEC_PT_CONCEAL_VMX |
- SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_ENCLS_EXITING;
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+ if (cpu_has_sgx())
+ opt2 |= SECONDARY_EXEC_ENCLS_EXITING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -2947,6 +3001,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
static int get_ept_level(struct kvm_vcpu *vcpu)
{
+ /* Nested EPT currently only supports 4-level walks. */
+ if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
+ return 4;
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
return 5;
return 4;
@@ -3815,24 +3872,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
* 2. If target vcpu isn't running(root mode), kick it to pick up the
* interrupt from PIR in next vmentry.
*/
-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int r;
r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
if (!r)
- return;
+ return 0;
+
+ if (!vcpu->arch.apicv_active)
+ return -1;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
- return;
+ return 0;
/* If a previous notification has sent the IPI, nothing to do. */
if (pi_test_and_set_on(&vmx->pi_desc))
- return;
+ return 0;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
+
+ return 0;
}
/*
@@ -4238,7 +4300,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->msr_ia32_umwait_control = 0;
- vcpu->arch.microcode_version = 0x100000000ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
vmx->hv_deadline_tsc = -1;
kvm_set_cr8(vcpu, 0);
@@ -6228,7 +6289,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
#endif
ASM_CALL_CONSTRAINT
:
- THUNK_TARGET(entry),
+ [thunk_target]"r"(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
@@ -6480,8 +6541,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
- if (vmx->nested.need_vmcs12_to_shadow_sync)
- nested_sync_vmcs12_to_shadow(vcpu);
+ /*
+ * We did this in prepare_switch_to_guest, because it needs to
+ * be within srcu_read_lock.
+ */
+ WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -6755,14 +6819,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(vcpu));
+ vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
+ vcpu->arch.microcode_version = 0x100000000ULL;
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
/*
@@ -6836,8 +6900,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
- enable_apicv);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -7098,6 +7161,40 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->req_immediate_exit = true;
}
+static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ unsigned short port;
+ bool intercept;
+ int size;
+
+ if (info->intercept == x86_intercept_in ||
+ info->intercept == x86_intercept_ins) {
+ port = info->src_val;
+ size = info->dst_bytes;
+ } else {
+ port = info->dst_val;
+ size = info->src_bytes;
+ }
+
+ /*
+ * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
+ * VM-exits depend on the 'unconditional IO exiting' VM-execution
+ * control.
+ *
+ * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
+ */
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ intercept = nested_cpu_has(vmcs12,
+ CPU_BASED_UNCOND_IO_EXITING);
+ else
+ intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
+
+ /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
+}
+
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@@ -7105,19 +7202,45 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ switch (info->intercept) {
/*
* RDPID causes #UD if disabled through secondary execution controls.
* Because it is marked as EmulateOnUD, we need to intercept it here.
*/
- if (info->intercept == x86_intercept_rdtscp &&
- !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
- ctxt->exception.vector = UD_VECTOR;
- ctxt->exception.error_code_valid = false;
- return X86EMUL_PROPAGATE_FAULT;
- }
+ case x86_intercept_rdtscp:
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+ ctxt->exception.vector = UD_VECTOR;
+ ctxt->exception.error_code_valid = false;
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ break;
+
+ case x86_intercept_in:
+ case x86_intercept_ins:
+ case x86_intercept_out:
+ case x86_intercept_outs:
+ return vmx_check_intercept_io(vcpu, info);
+
+ case x86_intercept_lgdt:
+ case x86_intercept_lidt:
+ case x86_intercept_lldt:
+ case x86_intercept_ltr:
+ case x86_intercept_sgdt:
+ case x86_intercept_sidt:
+ case x86_intercept_sldt:
+ case x86_intercept_str:
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
+ return X86EMUL_CONTINUE;
+
+ /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ break;
/* TODO: check more intercepts... */
- return X86EMUL_CONTINUE;
+ default:
+ break;
+ }
+
+ return X86EMUL_UNHANDLEABLE;
}
#ifdef CONFIG_X86_64
@@ -7699,7 +7822,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept, enable_apicv);
+ vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
@@ -7783,7 +7906,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
- .skip_emulated_instruction = skip_emulated_instruction,
+ .skip_emulated_instruction = vmx_skip_emulated_instruction,
+ .update_emulated_instruction = vmx_update_emulated_instruction,
.set_interrupt_shadow = vmx_set_interrupt_shadow,
.get_interrupt_shadow = vmx_get_interrupt_shadow,
.patch_hypercall = vmx_patch_hypercall,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7f42cf3dcd70..0695ea177e22 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -14,8 +14,6 @@
extern const u32 vmx_msr_index[];
extern u64 host_efer;
-extern u32 get_umwait_control_msr(void);
-
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
#define MSR_TYPE_RW 3
@@ -150,6 +148,9 @@ struct nested_vmx {
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
+ /* Pending MTF VM-exit into L1. */
+ bool mtf_pending;
+
struct loaded_vmcs vmcs02;
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fbabb2f06273..bf8564d73fc3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -438,6 +438,14 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
* for #DB exceptions under VMX.
*/
vcpu->arch.dr6 ^= payload & DR6_RTM;
+
+ /*
+ * The #DB payload is defined as compatible with the 'pending
+ * debug exceptions' field under VMX, not DR6. While bit 12 is
+ * defined in the 'pending debug exceptions' field (enabled
+ * breakpoint), it is reserved and must be zero in DR6.
+ */
+ vcpu->arch.dr6 &= ~BIT(12);
break;
case PF_VECTOR:
vcpu->arch.cr2 = payload;
@@ -490,19 +498,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.error_code = error_code;
vcpu->arch.exception.has_payload = has_payload;
vcpu->arch.exception.payload = payload;
- /*
- * In guest mode, payload delivery should be deferred,
- * so that the L1 hypervisor can intercept #PF before
- * CR2 is modified (or intercept #DB before DR6 is
- * modified under nVMX). However, for ABI
- * compatibility with KVM_GET_VCPU_EVENTS and
- * KVM_SET_VCPU_EVENTS, we can't delay payload
- * delivery unless userspace has enabled this
- * functionality via the per-VM capability,
- * KVM_CAP_EXCEPTION_PAYLOAD.
- */
- if (!vcpu->kvm->arch.exception_payload_enabled ||
- !is_guest_mode(vcpu))
+ if (!is_guest_mode(vcpu))
kvm_deliver_exception_payload(vcpu);
return;
}
@@ -1558,7 +1554,10 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
*/
static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
{
- if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
+ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
+ return 1;
+
+ if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
@@ -1635,7 +1634,7 @@ static void update_pvclock_gtod(struct timekeeper *tk)
write_seqcount_begin(&vdata->seq);
/* copy pvclock gtod data */
- vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
vdata->clock.mask = tk->tkr_mono.mask;
vdata->clock.mult = tk->tkr_mono.mult;
@@ -1643,7 +1642,7 @@ static void update_pvclock_gtod(struct timekeeper *tk)
vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
vdata->clock.offset = tk->tkr_mono.base;
- vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode;
+ vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
vdata->raw_clock.mask = tk->tkr_raw.mask;
vdata->raw_clock.mult = tk->tkr_raw.mult;
@@ -1844,7 +1843,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
static inline int gtod_is_based_on_tsc(int mode)
{
- return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
+ return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
@@ -1937,7 +1936,7 @@ static inline bool kvm_check_tsc_unstable(void)
* TSC is marked unstable when we're running on Hyper-V,
* 'TSC page' clocksource is good.
*/
- if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
+ if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
return false;
#endif
return check_tsc_unstable();
@@ -2092,30 +2091,30 @@ static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
u64 tsc_pg_val;
switch (clock->vclock_mode) {
- case VCLOCK_HVCLOCK:
+ case VDSO_CLOCKMODE_HVCLOCK:
tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
tsc_timestamp);
if (tsc_pg_val != U64_MAX) {
/* TSC page valid */
- *mode = VCLOCK_HVCLOCK;
+ *mode = VDSO_CLOCKMODE_HVCLOCK;
v = (tsc_pg_val - clock->cycle_last) &
clock->mask;
} else {
/* TSC page invalid */
- *mode = VCLOCK_NONE;
+ *mode = VDSO_CLOCKMODE_NONE;
}
break;
- case VCLOCK_TSC:
- *mode = VCLOCK_TSC;
+ case VDSO_CLOCKMODE_TSC:
+ *mode = VDSO_CLOCKMODE_TSC;
*tsc_timestamp = read_tsc();
v = (*tsc_timestamp - clock->cycle_last) &
clock->mask;
break;
default:
- *mode = VCLOCK_NONE;
+ *mode = VDSO_CLOCKMODE_NONE;
}
- if (*mode == VCLOCK_NONE)
+ if (*mode == VDSO_CLOCKMODE_NONE)
*tsc_timestamp = v = 0;
return v * clock->mult;
@@ -2448,7 +2447,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
- WARN_ON(vcpu->hv_clock.system_time < 0);
/* If the host uses TSC clocksource, then it is stable */
pvclock_flags = 0;
@@ -3796,6 +3794,21 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
process_nmi(vcpu);
/*
+ * In guest mode, payload delivery should be deferred,
+ * so that the L1 hypervisor can intercept #PF before
+ * CR2 is modified (or intercept #DB before DR6 is
+ * modified under nVMX). Unless the per-VM capability,
+ * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of
+ * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we
+ * opportunistically defer the exception payload, deliver it if the
+ * capability hasn't been requested before processing a
+ * KVM_GET_VCPU_EVENTS.
+ */
+ if (!vcpu->kvm->arch.exception_payload_enabled &&
+ vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
+ kvm_deliver_exception_payload(vcpu);
+
+ /*
* The API doesn't provide the instruction length for software
* exceptions, so don't report them. As long as the guest RIP
* isn't advanced, we should expect to encounter the exception
@@ -6880,6 +6893,8 @@ restart:
kvm_rip_write(vcpu, ctxt->eip);
if (r && ctxt->tf)
r = kvm_vcpu_do_singlestep(vcpu);
+ if (kvm_x86_ops->update_emulated_instruction)
+ kvm_x86_ops->update_emulated_instruction(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}
@@ -7177,14 +7192,16 @@ static void kvm_timer_init(void)
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
- struct cpufreq_policy policy;
+ struct cpufreq_policy *policy;
int cpu;
- memset(&policy, 0, sizeof(policy));
cpu = get_cpu();
- cpufreq_get_policy(&policy, cpu);
- if (policy.cpuinfo.max_freq)
- max_tsc_khz = policy.cpuinfo.max_freq;
+ policy = cpufreq_cpu_get(cpu);
+ if (policy) {
+ if (policy->cpuinfo.max_freq)
+ max_tsc_khz = policy->cpuinfo.max_freq;
+ cpufreq_cpu_put(policy);
+ }
put_cpu();
#endif
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
@@ -7295,12 +7312,12 @@ int kvm_arch_init(void *opaque)
}
if (!ops->cpu_has_kvm_support()) {
- printk(KERN_ERR "kvm: no hardware support\n");
+ pr_err_ratelimited("kvm: no hardware support\n");
r = -EOPNOTSUPP;
goto out;
}
if (ops->disabled_by_bios()) {
- printk(KERN_ERR "kvm: disabled by bios\n");
+ pr_err_ratelimited("kvm: disabled by bios\n");
r = -EOPNOTSUPP;
goto out;
}
@@ -8942,7 +8959,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
@@ -10182,7 +10198,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
return;
- vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
+ kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
}
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 53adc1762ec0..ec31f5b60323 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -366,7 +366,7 @@ AVXcode: 1
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c: Grp20 (1A),(1C)
1d:
-1e:
+1e: Grp21 (1A)
1f: NOP Ev
# 0x0f 0x20-0x2f
20: MOV Rd,Cd
@@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
f2: ANDN Gy,By,Ey (v)
f3: Grp17 (1A)
-f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
-f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
f9: MOVDIRI My,Gy
@@ -970,7 +970,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -1041,8 +1041,8 @@ GrpTable: Grp15
2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE | ptwrite Ey (F3),(11B)
-5: XRSTOR | lfence (11B)
-6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
+5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B)
+6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
@@ -1077,6 +1077,11 @@ GrpTable: Grp20
0: cldemote Mb
EndTable
+GrpTable: Grp21
+1: RDSSPD/Q Ry (F3),(11B)
+7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B)
+EndTable
+
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 64229dad7eab..69309cd56fdf 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -363,13 +363,8 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
{
const struct ptdump_range ptdump_ranges[] = {
#ifdef CONFIG_X86_64
-
-#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
-#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \
- normalize_addr_shift)
-
{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
- {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
+ {GUARD_HOLE_END_ADDR, ~0UL},
#else
{0, ~0UL},
#endif
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 30bb0bd3b1b8..b991aa4bdfae 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -80,18 +80,6 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_uaccess);
-__visible bool ex_handler_ext(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
-{
- /* Special hack for uaccess_err */
- current->thread.uaccess_err = 1;
- regs->ip = ex_fixup_addr(fixup);
- return true;
-}
-EXPORT_SYMBOL(ex_handler_ext);
-
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fa4ea09593ab..629fdf13f846 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
-void vmalloc_sync_all(void)
+static void vmalloc_sync(void)
{
unsigned long address;
@@ -217,6 +217,16 @@ void vmalloc_sync_all(void)
}
}
+void vmalloc_sync_mappings(void)
+{
+ vmalloc_sync();
+}
+
+void vmalloc_sync_unmappings(void)
+{
+ vmalloc_sync();
+}
+
/*
* 32-bit:
*
@@ -319,11 +329,23 @@ out:
#else /* CONFIG_X86_64: */
-void vmalloc_sync_all(void)
+void vmalloc_sync_mappings(void)
{
+ /*
+ * 64-bit mappings might allocate new p4d/pud pages
+ * that need to be propagated to all tasks' PGDs.
+ */
sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}
+void vmalloc_sync_unmappings(void)
+{
+ /*
+ * Unmappings never allocate or free p4d/pud pages.
+ * No work is required here.
+ */
+}
+
/*
* 64-bit:
*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 23df4885bbed..8ae0272c1c51 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -788,44 +788,6 @@ void __init mem_init(void)
x86_init.hyper.init_after_bootmem();
mem_init_print_info(NULL);
- printk(KERN_INFO "virtual kernel memory layout:\n"
- " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
- " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
-#ifdef CONFIG_HIGHMEM
- " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
-#endif
- " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
- " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
- " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
- " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
- " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
- FIXADDR_START, FIXADDR_TOP,
- (FIXADDR_TOP - FIXADDR_START) >> 10,
-
- CPU_ENTRY_AREA_BASE,
- CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
- CPU_ENTRY_AREA_MAP_SIZE >> 10,
-
-#ifdef CONFIG_HIGHMEM
- PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
- (LAST_PKMAP*PAGE_SIZE) >> 10,
-#endif
-
- VMALLOC_START, VMALLOC_END,
- (VMALLOC_END - VMALLOC_START) >> 20,
-
- (unsigned long)__va(0), (unsigned long)high_memory,
- ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
-
- (unsigned long)&__init_begin, (unsigned long)&__init_end,
- ((unsigned long)&__init_end -
- (unsigned long)&__init_begin) >> 10,
-
- (unsigned long)&_etext, (unsigned long)&_edata,
- ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
-
- (unsigned long)&_text, (unsigned long)&_etext,
- ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
/*
* Check boundaries twice: Some fundamental inconsistencies can
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index abbdecb75fad..0a14711d3a93 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -818,8 +818,7 @@ void __init paging_init(void)
* will not set it back.
*/
node_clear_state(0, N_MEMORY);
- if (N_MEMORY != N_NORMAL_MEMORY)
- node_clear_state(0, N_NORMAL_MEMORY);
+ node_clear_state(0, N_NORMAL_MEMORY);
zone_sizes_init();
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 44e4beb4239f..18c637c0dc6f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -106,6 +106,22 @@ static unsigned int __ioremap_check_encrypted(struct resource *res)
return 0;
}
+/*
+ * The EFI runtime services data area is not covered by walk_mem_res(), but must
+ * be mapped encrypted when SEV is active.
+ */
+static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
+{
+ if (!sev_active())
+ return;
+
+ if (!IS_ENABLED(CONFIG_EFI))
+ return;
+
+ if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
+ desc->flags |= IORES_MAP_ENCRYPTED;
+}
+
static int __ioremap_collect_map_flags(struct resource *res, void *arg)
{
struct ioremap_desc *desc = arg;
@@ -124,6 +140,9 @@ static int __ioremap_collect_map_flags(struct resource *res, void *arg)
* To avoid multiple resource walks, this function walks resources marked as
* IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
* resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
+ *
+ * After that, deal with misc other ranges in __ioremap_check_other() which do
+ * not fall into the above category.
*/
static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
struct ioremap_desc *desc)
@@ -135,6 +154,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
memset(desc, 0, sizeof(struct ioremap_desc));
walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
+
+ __ioremap_check_other(addr, desc);
}
/*
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 49d7814b59a9..9994353fb75d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -260,7 +260,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
goto no_kmmio;
}
- ctx = &get_cpu_var(kmmio_ctx);
+ ctx = this_cpu_ptr(&kmmio_ctx);
if (ctx->active) {
if (page_base == ctx->addr) {
/*
@@ -285,7 +285,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
disarm_kmmio_fault_page(faultpage);
}
- goto no_kmmio_ctx;
+ goto no_kmmio;
}
ctx->active++;
@@ -314,11 +314,8 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
* the user should drop to single cpu before tracing.
*/
- put_cpu_var(kmmio_ctx);
return 1; /* fault handled */
-no_kmmio_ctx:
- put_cpu_var(kmmio_ctx);
no_kmmio:
rcu_read_unlock();
preempt_enable_no_resched();
@@ -333,7 +330,7 @@ no_kmmio:
static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
{
int ret = 0;
- struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+ struct kmmio_context *ctx = this_cpu_ptr(&kmmio_ctx);
if (!ctx->active) {
/*
@@ -371,7 +368,6 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
if (!(regs->flags & X86_EFLAGS_TF))
ret = 1;
out:
- put_cpu_var(kmmio_ctx);
return ret;
}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 673de6063345..109325d77b3e 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -386,7 +386,7 @@ static void enter_uniprocessor(void)
put_online_cpus();
for_each_cpu(cpu, downed_cpus) {
- err = cpu_down(cpu);
+ err = remove_cpu(cpu);
if (!err)
pr_info("CPU%d is down.\n", cpu);
else
@@ -406,7 +406,7 @@ static void leave_uniprocessor(void)
return;
pr_notice("Re-enabling CPUs...\n");
for_each_cpu(cpu, downed_cpus) {
- err = cpu_up(cpu);
+ err = add_cpu(cpu);
if (!err)
pr_info("enabled CPU%d.\n", cpu);
else
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 7f1d2034df1e..c5174b4e318b 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -324,7 +324,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
0, NULL, NUMA_NO_NODE);
}
-int __init setup_emu2phys_nid(int *dfl_phys_nid)
+static int __init setup_emu2phys_nid(int *dfl_phys_nid)
{
int i, max_emu_nid = 0;
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index c4aedd00c1ba..6d5424069e2b 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -15,6 +15,7 @@
#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
+#include <linux/libnvdimm.h>
#include <asm/e820/api.h>
#include <asm/processor.h>
@@ -304,11 +305,13 @@ void clflush_cache_range(void *vaddr, unsigned int size)
}
EXPORT_SYMBOL_GPL(clflush_cache_range);
+#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_invalidate_pmem(void *addr, size_t size)
{
clflush_cache_range(addr, size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+#endif
static void __cpa_flush_all(void *arg)
{
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 44a9f068eee0..843aa10a4cb6 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -39,6 +39,7 @@
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/sections.h>
+#include <asm/set_memory.h>
#undef pr_fmt
#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
@@ -555,13 +556,6 @@ static inline bool pti_kernel_image_global_ok(void)
}
/*
- * This is the only user for these and it is not arch-generic
- * like the other set_memory.h functions. Just extern them.
- */
-extern int set_memory_nonglobal(unsigned long addr, int numpages);
-extern int set_memory_global(unsigned long addr, int numpages);
-
-/*
* For some configurations, map all of kernel text into the user page
* tables. This reduces TLB misses, especially on non-PCID systems.
*/
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 393d251798c0..4d2a7a764602 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2039,10 +2039,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
}
/* and dreg_lo,sreg_lo */
EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
- /* and dreg_hi,sreg_hi */
- EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
- /* or dreg_lo,dreg_hi */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ if (is_jmp64) {
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ }
goto emit_cond_jmp;
}
case BPF_JMP | BPF_JSET | BPF_K:
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index ee6b0780bea1..f8ed5f66cd20 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -117,17 +117,16 @@ static void punit_dbgfs_unregister(void)
debugfs_remove_recursive(punit_dbg_file);
}
-#define ICPU(model, drv_data) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\
- (kernel_ulong_t)&drv_data }
+#define X86_MATCH(model, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
+ X86_FEATURE_MWAIT, data)
static const struct x86_cpu_id intel_punit_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt),
- ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng),
- ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht),
+ X86_MATCH(ATOM_SILVERMONT, &punit_device_byt),
+ X86_MATCH(ATOM_SILVERMONT_MID, &punit_device_tng),
+ X86_MATCH(ATOM_AIRMONT, &punit_device_cht),
{}
};
-
MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids);
static int __init punit_atom_debug_init(void)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ae923ee8e2b4..1aae5302501d 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -54,10 +54,16 @@
#include <asm/x86_init.h>
#include <asm/uv/uv.h>
-static efi_system_table_t efi_systab __initdata;
-static u64 efi_systab_phys __initdata;
+static unsigned long efi_systab_phys __initdata;
+static unsigned long prop_phys = EFI_INVALID_TABLE_ADDR;
+static unsigned long uga_phys = EFI_INVALID_TABLE_ADDR;
+static unsigned long efi_runtime, efi_nr_tables;
-static efi_config_table_type_t arch_tables[] __initdata = {
+unsigned long efi_fw_vendor, efi_config_table;
+
+static const efi_config_table_type_t arch_tables[] __initconst = {
+ {EFI_PROPERTIES_TABLE_GUID, "PROP", &prop_phys},
+ {UGA_IO_PROTOCOL_GUID, "UGA", &uga_phys},
#ifdef CONFIG_X86_UV
{UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
#endif
@@ -65,26 +71,26 @@ static efi_config_table_type_t arch_tables[] __initdata = {
};
static const unsigned long * const efi_tables[] = {
- &efi.mps,
&efi.acpi,
&efi.acpi20,
&efi.smbios,
&efi.smbios3,
- &efi.boot_info,
- &efi.hcdp,
- &efi.uga,
+ &uga_phys,
#ifdef CONFIG_X86_UV
&uv_systab_phys,
#endif
- &efi.fw_vendor,
- &efi.runtime,
- &efi.config_table,
+ &efi_fw_vendor,
+ &efi_runtime,
+ &efi_config_table,
&efi.esrt,
- &efi.properties_table,
- &efi.mem_attr_table,
+ &prop_phys,
+ &efi_mem_attr_table,
#ifdef CONFIG_EFI_RCI2_TABLE
&rci2_table_phys,
#endif
+ &efi.tpm_log,
+ &efi.tpm_final_log,
+ &efi_rng_seed,
};
u64 efi_setup; /* efi setup_data physical address */
@@ -214,16 +220,13 @@ int __init efi_memblock_x86_reserve_range(void)
if (efi_enabled(EFI_PARAVIRT))
return 0;
-#ifdef CONFIG_X86_32
- /* Can't handle data above 4GB at this time */
- if (e->efi_memmap_hi) {
+ /* Can't handle firmware tables above 4GB on i386 */
+ if (IS_ENABLED(CONFIG_X86_32) && e->efi_memmap_hi > 0) {
pr_err("Memory map is above 4GB, disabling EFI.\n");
return -EINVAL;
}
- pmap = e->efi_memmap;
-#else
- pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
-#endif
+ pmap = (phys_addr_t)(e->efi_memmap | ((u64)e->efi_memmap_hi << 32));
+
data.phys_map = pmap;
data.size = e->efi_memmap_size;
data.desc_size = e->efi_memdesc_size;
@@ -243,6 +246,7 @@ int __init efi_memblock_x86_reserve_range(void)
efi.memmap.desc_version);
memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
+ set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags);
return 0;
}
@@ -305,11 +309,11 @@ static void __init efi_clean_memmap(void)
if (n_removal > 0) {
struct efi_memory_map_data data = {
- .phys_map = efi.memmap.phys_map,
- .desc_version = efi.memmap.desc_version,
- .desc_size = efi.memmap.desc_size,
- .size = efi.memmap.desc_size * (efi.memmap.nr_map - n_removal),
- .flags = 0,
+ .phys_map = efi.memmap.phys_map,
+ .desc_version = efi.memmap.desc_version,
+ .desc_size = efi.memmap.desc_size,
+ .size = efi.memmap.desc_size * (efi.memmap.nr_map - n_removal),
+ .flags = 0,
};
pr_warn("Removing %d invalid memory map entries.\n", n_removal);
@@ -333,43 +337,32 @@ void __init efi_print_memmap(void)
}
}
-static int __init efi_systab_init(u64 phys)
+static int __init efi_systab_init(unsigned long phys)
{
int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t)
: sizeof(efi_system_table_32_t);
+ const efi_table_hdr_t *hdr;
bool over4g = false;
void *p;
+ int ret;
- p = early_memremap_ro(phys, size);
+ hdr = p = early_memremap_ro(phys, size);
if (p == NULL) {
pr_err("Couldn't map the system table!\n");
return -ENOMEM;
}
+ ret = efi_systab_check_header(hdr, 1);
+ if (ret) {
+ early_memunmap(p, size);
+ return ret;
+ }
+
if (efi_enabled(EFI_64BIT)) {
const efi_system_table_64_t *systab64 = p;
- efi_systab.hdr = systab64->hdr;
- efi_systab.fw_vendor = systab64->fw_vendor;
- efi_systab.fw_revision = systab64->fw_revision;
- efi_systab.con_in_handle = systab64->con_in_handle;
- efi_systab.con_in = systab64->con_in;
- efi_systab.con_out_handle = systab64->con_out_handle;
- efi_systab.con_out = (void *)(unsigned long)systab64->con_out;
- efi_systab.stderr_handle = systab64->stderr_handle;
- efi_systab.stderr = systab64->stderr;
- efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
- efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
- efi_systab.nr_tables = systab64->nr_tables;
- efi_systab.tables = systab64->tables;
-
- over4g = systab64->con_in_handle > U32_MAX ||
- systab64->con_in > U32_MAX ||
- systab64->con_out_handle > U32_MAX ||
- systab64->con_out > U32_MAX ||
- systab64->stderr_handle > U32_MAX ||
- systab64->stderr > U32_MAX ||
- systab64->boottime > U32_MAX;
+ efi_runtime = systab64->runtime;
+ over4g = systab64->runtime > U32_MAX;
if (efi_setup) {
struct efi_setup_data *data;
@@ -380,38 +373,33 @@ static int __init efi_systab_init(u64 phys)
return -ENOMEM;
}
- efi_systab.fw_vendor = (unsigned long)data->fw_vendor;
- efi_systab.runtime = (void *)(unsigned long)data->runtime;
- efi_systab.tables = (unsigned long)data->tables;
+ efi_fw_vendor = (unsigned long)data->fw_vendor;
+ efi_config_table = (unsigned long)data->tables;
over4g |= data->fw_vendor > U32_MAX ||
- data->runtime > U32_MAX ||
data->tables > U32_MAX;
early_memunmap(data, sizeof(*data));
} else {
+ efi_fw_vendor = systab64->fw_vendor;
+ efi_config_table = systab64->tables;
+
over4g |= systab64->fw_vendor > U32_MAX ||
- systab64->runtime > U32_MAX ||
systab64->tables > U32_MAX;
}
+ efi_nr_tables = systab64->nr_tables;
} else {
const efi_system_table_32_t *systab32 = p;
- efi_systab.hdr = systab32->hdr;
- efi_systab.fw_vendor = systab32->fw_vendor;
- efi_systab.fw_revision = systab32->fw_revision;
- efi_systab.con_in_handle = systab32->con_in_handle;
- efi_systab.con_in = systab32->con_in;
- efi_systab.con_out_handle = systab32->con_out_handle;
- efi_systab.con_out = (void *)(unsigned long)systab32->con_out;
- efi_systab.stderr_handle = systab32->stderr_handle;
- efi_systab.stderr = systab32->stderr;
- efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
- efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
- efi_systab.nr_tables = systab32->nr_tables;
- efi_systab.tables = systab32->tables;
+ efi_fw_vendor = systab32->fw_vendor;
+ efi_runtime = systab32->runtime;
+ efi_config_table = systab32->tables;
+ efi_nr_tables = systab32->nr_tables;
}
+ efi.runtime_version = hdr->revision;
+
+ efi_systab_report_header(hdr, efi_fw_vendor);
early_memunmap(p, size);
if (IS_ENABLED(CONFIG_X86_32) && over4g) {
@@ -419,29 +407,40 @@ static int __init efi_systab_init(u64 phys)
return -EINVAL;
}
- efi.systab = &efi_systab;
+ return 0;
+}
+
+static int __init efi_config_init(const efi_config_table_type_t *arch_tables)
+{
+ void *config_tables;
+ int sz, ret;
+
+ if (efi_nr_tables == 0)
+ return 0;
+
+ if (efi_enabled(EFI_64BIT))
+ sz = sizeof(efi_config_table_64_t);
+ else
+ sz = sizeof(efi_config_table_32_t);
/*
- * Verify the EFI Table
+ * Let's see what config tables the firmware passed to us.
*/
- if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
- pr_err("System table signature incorrect!\n");
- return -EINVAL;
+ config_tables = early_memremap(efi_config_table, efi_nr_tables * sz);
+ if (config_tables == NULL) {
+ pr_err("Could not map Configuration table!\n");
+ return -ENOMEM;
}
- if ((efi.systab->hdr.revision >> 16) == 0)
- pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff);
- return 0;
+ ret = efi_config_parse_tables(config_tables, efi_nr_tables,
+ arch_tables);
+
+ early_memunmap(config_tables, efi_nr_tables * sz);
+ return ret;
}
void __init efi_init(void)
{
- efi_char16_t *c16;
- char vendor[100] = "unknown";
- int i = 0;
-
if (IS_ENABLED(CONFIG_X86_32) &&
(boot_params.efi_info.efi_systab_hi ||
boot_params.efi_info.efi_memmap_hi)) {
@@ -455,29 +454,7 @@ void __init efi_init(void)
if (efi_systab_init(efi_systab_phys))
return;
- efi.config_table = (unsigned long)efi.systab->tables;
- efi.fw_vendor = (unsigned long)efi.systab->fw_vendor;
- efi.runtime = (unsigned long)efi.systab->runtime;
-
- /*
- * Show what we know for posterity
- */
- c16 = early_memremap_ro(efi.systab->fw_vendor,
- sizeof(vendor) * sizeof(efi_char16_t));
- if (c16) {
- for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i)
- vendor[i] = c16[i];
- vendor[i] = '\0';
- early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
- } else {
- pr_err("Could not map the firmware vendor!\n");
- }
-
- pr_info("EFI v%u.%.02u by %s\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff, vendor);
-
- if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
+ if (efi_reuse_config(efi_config_table, efi_nr_tables))
return;
if (efi_config_init(arch_tables))
@@ -496,6 +473,22 @@ void __init efi_init(void)
return;
}
+ /* Parse the EFI Properties table if it exists */
+ if (prop_phys != EFI_INVALID_TABLE_ADDR) {
+ efi_properties_table_t *tbl;
+
+ tbl = early_memremap_ro(prop_phys, sizeof(*tbl));
+ if (tbl == NULL) {
+ pr_err("Could not map Properties table!\n");
+ } else {
+ if (tbl->memory_protection_attribute &
+ EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA)
+ set_bit(EFI_NX_PE_DATA, &efi.flags);
+
+ early_memunmap(tbl, sizeof(*tbl));
+ }
+ }
+
set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
efi_clean_memmap();
@@ -602,20 +595,6 @@ static void __init efi_merge_regions(void)
}
}
-static void __init get_systab_virt_addr(efi_memory_desc_t *md)
-{
- unsigned long size;
- u64 end, systab;
-
- size = md->num_pages << EFI_PAGE_SHIFT;
- end = md->phys_addr + size;
- systab = efi_systab_phys;
- if (md->phys_addr <= systab && systab < end) {
- systab += md->virt_addr - md->phys_addr;
- efi.systab = (efi_system_table_t *)(unsigned long)systab;
- }
-}
-
static void *realloc_pages(void *old_memmap, int old_shift)
{
void *ret;
@@ -771,7 +750,6 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
continue;
efi_map_region(md);
- get_systab_virt_addr(md);
if (left < desc_size) {
new_memmap = realloc_pages(new_memmap, *pg_shift);
@@ -797,8 +775,6 @@ static void __init kexec_enter_virtual_mode(void)
efi_memory_desc_t *md;
unsigned int num_pages;
- efi.systab = NULL;
-
/*
* We don't do virtual mode, since we don't do runtime services, on
* non-native EFI. With the UV1 memmap, we don't do runtime services in
@@ -821,10 +797,8 @@ static void __init kexec_enter_virtual_mode(void)
* Map efi regions which were passed via setup_data. The virt_addr is a
* fixed addr which was used in first kernel of a kexec boot.
*/
- for_each_efi_memory_desc(md) {
+ for_each_efi_memory_desc(md)
efi_map_region_fixed(md); /* FIXME: add error handling */
- get_systab_virt_addr(md);
- }
/*
* Unregister the early EFI memmap from efi_init() and install
@@ -839,8 +813,6 @@ static void __init kexec_enter_virtual_mode(void)
return;
}
- BUG_ON(!efi.systab);
-
num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
num_pages >>= PAGE_SHIFT;
@@ -850,15 +822,6 @@ static void __init kexec_enter_virtual_mode(void)
}
efi_sync_low_kernel_mappings();
-
- /*
- * Now that EFI is in virtual mode, update the function
- * pointers in the runtime service table to the new virtual addresses.
- *
- * Call EFI services through wrapper functions.
- */
- efi.runtime_version = efi_systab.hdr.revision;
-
efi_native_runtime_setup();
#endif
}
@@ -892,8 +855,6 @@ static void __init __efi_enter_virtual_mode(void)
efi_status_t status;
unsigned long pa;
- efi.systab = NULL;
-
if (efi_alloc_page_tables()) {
pr_err("Failed to allocate EFI page tables\n");
goto err;
@@ -925,9 +886,6 @@ static void __init __efi_enter_virtual_mode(void)
efi_print_memmap();
}
- if (WARN_ON(!efi.systab))
- goto err;
-
if (efi_setup_page_tables(pa, 1 << pg_shift))
goto err;
@@ -936,23 +894,17 @@ static void __init __efi_enter_virtual_mode(void)
status = efi_set_virtual_address_map(efi.memmap.desc_size * count,
efi.memmap.desc_size,
efi.memmap.desc_version,
- (efi_memory_desc_t *)pa);
+ (efi_memory_desc_t *)pa,
+ efi_systab_phys);
if (status != EFI_SUCCESS) {
pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n",
status);
goto err;
}
+ efi_check_for_embedded_firmwares();
efi_free_boot_services();
- /*
- * Now that EFI is in virtual mode, update the function
- * pointers in the runtime service table to the new virtual addresses.
- *
- * Call EFI services through wrapper functions.
- */
- efi.runtime_version = efi_systab.hdr.revision;
-
if (!efi_is_mixed())
efi_native_runtime_setup();
else
@@ -978,6 +930,8 @@ void __init efi_enter_virtual_mode(void)
if (efi_enabled(EFI_PARAVIRT))
return;
+ efi.runtime = (efi_runtime_services_t *)efi_runtime;
+
if (efi_setup)
kexec_enter_virtual_mode();
else
@@ -999,3 +953,43 @@ bool efi_is_table_address(unsigned long phys_addr)
return false;
}
+
+char *efi_systab_show_arch(char *str)
+{
+ if (uga_phys != EFI_INVALID_TABLE_ADDR)
+ str += sprintf(str, "UGA=0x%lx\n", uga_phys);
+ return str;
+}
+
+#define EFI_FIELD(var) efi_ ## var
+
+#define EFI_ATTR_SHOW(name) \
+static ssize_t name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \
+}
+
+EFI_ATTR_SHOW(fw_vendor);
+EFI_ATTR_SHOW(runtime);
+EFI_ATTR_SHOW(config_table);
+
+struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor);
+struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime);
+struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table);
+
+umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n)
+{
+ if (attr == &efi_attr_fw_vendor.attr) {
+ if (efi_enabled(EFI_PARAVIRT) ||
+ efi_fw_vendor == EFI_INVALID_TABLE_ADDR)
+ return 0;
+ } else if (attr == &efi_attr_runtime.attr) {
+ if (efi_runtime == EFI_INVALID_TABLE_ADDR)
+ return 0;
+ } else if (attr == &efi_attr_config_table.attr) {
+ if (efi_config_table == EFI_INVALID_TABLE_ADDR)
+ return 0;
+ }
+ return attr->mode;
+}
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 081d466002c9..c049c432745d 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -66,14 +66,16 @@ void __init efi_map_region(efi_memory_desc_t *md)
void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
-efi_status_t efi_call_svam(efi_set_virtual_address_map_t *__efiapi *,
- u32, u32, u32, void *);
+efi_status_t efi_call_svam(efi_runtime_services_t * const *,
+ u32, u32, u32, void *, u32);
efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size,
unsigned long descriptor_size,
u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
+ efi_memory_desc_t *virtual_map,
+ unsigned long systab_phys)
{
+ const efi_system_table_t *systab = (efi_system_table_t *)systab_phys;
struct desc_ptr gdt_descr;
efi_status_t status;
unsigned long flags;
@@ -90,9 +92,10 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size,
/* Disable interrupts around EFI calls: */
local_irq_save(flags);
- status = efi_call_svam(&efi.systab->runtime->set_virtual_address_map,
+ status = efi_call_svam(&systab->runtime,
memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
+ descriptor_version, virtual_map,
+ __pa(&efi.runtime));
local_irq_restore(flags);
load_fixmap_gdt(0);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index fa8506e76bbe..211bb9358b73 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -180,7 +180,7 @@ void efi_sync_low_kernel_mappings(void)
static inline phys_addr_t
virt_to_phys_or_null_size(void *va, unsigned long size)
{
- bool bad_size;
+ phys_addr_t pa;
if (!va)
return 0;
@@ -188,16 +188,13 @@ virt_to_phys_or_null_size(void *va, unsigned long size)
if (virt_addr_valid(va))
return virt_to_phys(va);
- /*
- * A fully aligned variable on the stack is guaranteed not to
- * cross a page bounary. Try to catch strings on the stack by
- * checking that 'size' is a power of two.
- */
- bad_size = size > PAGE_SIZE || !is_power_of_2(size);
+ pa = slow_virt_to_phys(va);
- WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size);
+ /* check if the object crosses a page boundary */
+ if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK))
+ return 0;
- return slow_virt_to_phys(va);
+ return pa;
}
#define virt_to_phys_or_null(addr) \
@@ -500,12 +497,9 @@ static DEFINE_SPINLOCK(efi_runtime_lock);
*/
#define __efi_thunk(func, ...) \
({ \
- efi_runtime_services_32_t *__rt; \
unsigned short __ds, __es; \
efi_status_t ____s; \
\
- __rt = (void *)(unsigned long)efi.systab->mixed_mode.runtime; \
- \
savesegment(ds, __ds); \
savesegment(es, __es); \
\
@@ -513,7 +507,7 @@ static DEFINE_SPINLOCK(efi_runtime_lock);
loadsegment(ds, __KERNEL_DS); \
loadsegment(es, __KERNEL_DS); \
\
- ____s = efi64_thunk(__rt->func, __VA_ARGS__); \
+ ____s = efi64_thunk(efi.runtime->mixed_mode.func, __VA_ARGS__); \
\
loadsegment(ds, __ds); \
loadsegment(es, __es); \
@@ -568,85 +562,25 @@ efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
- efi_status_t status;
- u32 phys_tm, phys_tc;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
- phys_tc = virt_to_phys_or_null(tc);
-
- status = efi_thunk(get_time, phys_tm, phys_tc);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t efi_thunk_set_time(efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(set_time, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_enabled, phys_pending, phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_enabled = virt_to_phys_or_null(enabled);
- phys_pending = virt_to_phys_or_null(pending);
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(get_wakeup_time, phys_enabled,
- phys_pending, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(set_wakeup_time, enabled, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static unsigned long efi_name_size(efi_char16_t *name)
@@ -658,6 +592,8 @@ static efi_status_t
efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 *attr, unsigned long *data_size, void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name, phys_vendor, phys_attr;
u32 phys_data_size, phys_data;
@@ -665,14 +601,19 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_data_size = virt_to_phys_or_null(data_size);
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_attr = virt_to_phys_or_null(attr);
phys_data = virt_to_phys_or_null_size(data, *data_size);
- status = efi_thunk(get_variable, phys_name, phys_vendor,
- phys_attr, phys_data_size, phys_data);
+ if (!phys_name || (data && !phys_data))
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_variable, phys_name, phys_vendor,
+ phys_attr, phys_data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -683,19 +624,25 @@ static efi_status_t
efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size, void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
- /* If data_size is > sizeof(u32) we've got problems */
- status = efi_thunk(set_variable, phys_name, phys_vendor,
- attr, data_size, phys_data);
+ if (!phys_name || !phys_data)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -707,6 +654,8 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size,
void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
@@ -714,13 +663,17 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
return EFI_NOT_READY;
+ *vnd = *vendor;
+
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
- /* If data_size is > sizeof(u32) we've got problems */
- status = efi_thunk(set_variable, phys_name, phys_vendor,
- attr, data_size, phys_data);
+ if (!phys_name || !phys_data)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -732,39 +685,36 @@ efi_thunk_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name_size, phys_name, phys_vendor;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_name_size = virt_to_phys_or_null(name_size);
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, *name_size);
- status = efi_thunk(get_next_variable, phys_name_size,
- phys_name, phys_vendor);
+ if (!phys_name)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_next_variable, phys_name_size,
+ phys_name, phys_vendor);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ *vendor = *vnd;
return status;
}
static efi_status_t
efi_thunk_get_next_high_mono_count(u32 *count)
{
- efi_status_t status;
- u32 phys_count;
- unsigned long flags;
-
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_count = virt_to_phys_or_null(count);
- status = efi_thunk(get_next_high_mono_count, phys_count);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static void
@@ -886,8 +836,10 @@ efi_status_t __init __no_sanitize_address
efi_set_virtual_address_map(unsigned long memory_map_size,
unsigned long descriptor_size,
u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
+ efi_memory_desc_t *virtual_map,
+ unsigned long systab_phys)
{
+ const efi_system_table_t *systab = (efi_system_table_t *)systab_phys;
efi_status_t status;
unsigned long flags;
pgd_t *save_pgd = NULL;
@@ -910,13 +862,16 @@ efi_set_virtual_address_map(unsigned long memory_map_size,
/* Disable interrupts around EFI calls: */
local_irq_save(flags);
- status = efi_call(efi.systab->runtime->set_virtual_address_map,
+ status = efi_call(efi.runtime->set_virtual_address_map,
memory_map_size, descriptor_size,
descriptor_version, virtual_map);
local_irq_restore(flags);
kernel_fpu_end();
+ /* grab the virtually remapped EFI runtime services table pointer */
+ efi.runtime = READ_ONCE(systab->runtime);
+
if (save_pgd)
efi_uv1_memmap_phys_epilog(save_pgd);
else
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index 75c46e7a809f..09ec84f6ef51 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -8,14 +8,20 @@
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/asm-offsets.h>
#include <asm/page_types.h>
__INIT
SYM_FUNC_START(efi_call_svam)
- push 8(%esp)
- push 8(%esp)
+ push %ebp
+ movl %esp, %ebp
+ push %ebx
+
+ push 16(%esp)
+ push 16(%esp)
push %ecx
push %edx
+ movl %eax, %ebx // &systab_phys->runtime
/*
* Switch to the flat mapped alias of this routine, by jumping to the
@@ -35,15 +41,20 @@ SYM_FUNC_START(efi_call_svam)
subl $__PAGE_OFFSET, %esp
/* call the EFI routine */
- call *(%eax)
+ movl (%eax), %eax
+ call *EFI_svam(%eax)
- /* convert ESP back to a kernel VA, and pop the outgoing args */
- addl $__PAGE_OFFSET + 16, %esp
+ /* grab the virtually remapped EFI runtime services table pointer */
+ movl (%ebx), %ecx
+ movl 36(%esp), %edx // &efi.runtime
+ movl %ecx, (%edx)
/* re-enable paging */
movl %cr0, %edx
orl $0x80000000, %edx
movl %edx, %cr0
+ movl 16(%esp), %ebx
+ leave
ret
SYM_FUNC_END(efi_call_svam)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 88d32c06cffa..a5a469cdf5bf 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -410,6 +410,10 @@ void __init efi_free_boot_services(void)
int num_entries = 0;
void *new, *new_md;
+ /* Keep all regions for /sys/kernel/debug/efi */
+ if (efi_enabled(EFI_DBG))
+ return;
+
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -537,7 +541,7 @@ int __init efi_reuse_config(u64 tables, int nr_tables)
goto out_memremap;
}
- for (i = 0; i < efi.systab->nr_tables; i++) {
+ for (i = 0; i < nr_tables; i++) {
efi_guid_t guid;
guid = ((efi_config_table_64_t *)p)->guid;
@@ -659,12 +663,9 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
return 1;
}
-#define ICPU(family, model, quirk_handler) \
- { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \
- (unsigned long)&quirk_handler }
-
static const struct x86_cpu_id efi_capsule_quirk_ids[] = {
- ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */
+ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000,
+ &qrk_capsule_setup_info),
{ }
};
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index e3f4bfc08f78..31dda18bb370 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,11 +60,8 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = {
.setup = tng_bt_sfi_setup,
};
-#define ICPU(model, ddata) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
-
static const struct x86_cpu_id bt_sfi_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &tng_bt_sfi_data),
{}
};
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index e9d97d52475e..0286fe1b14b5 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -569,7 +569,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
}
static const struct x86_cpu_id imr_ids[] __initconst = {
- { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
+ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL),
{}
};
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 4307830e1b6f..570e3062faac 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -105,7 +105,7 @@ static void __init imr_self_test(void)
}
static const struct x86_cpu_id imr_ids[] __initconst = {
- { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
+ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL),
{}
};
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index 9e2444500428..526f70f27c1c 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -265,7 +265,7 @@ static void iosf_mbi_reset_semaphore(void)
iosf_mbi_sem_address, 0, PUNIT_SEMAPHORE_BIT))
dev_err(&mbi_pdev->dev, "Error P-Unit semaphore reset failed\n");
- pm_qos_update_request(&iosf_mbi_pm_qos, PM_QOS_DEFAULT_VALUE);
+ cpu_latency_qos_update_request(&iosf_mbi_pm_qos, PM_QOS_DEFAULT_VALUE);
blocking_notifier_call_chain(&iosf_mbi_pmic_bus_access_notifier,
MBI_PMIC_BUS_ACCESS_END, NULL);
@@ -301,8 +301,8 @@ static void iosf_mbi_reset_semaphore(void)
* 4) When CPU cores enter C6 or C7 the P-Unit needs to talk to the PMIC
* if this happens while the kernel itself is accessing the PMIC I2C bus
* the SoC hangs.
- * As the third step we call pm_qos_update_request() to disallow the CPU
- * to enter C6 or C7.
+ * As the third step we call cpu_latency_qos_update_request() to disallow the
+ * CPU to enter C6 or C7.
*
* 5) The P-Unit has a PMIC bus semaphore which we can request to stop
* autonomous P-Unit tasks from accessing the PMIC I2C bus while we hold it.
@@ -338,7 +338,7 @@ int iosf_mbi_block_punit_i2c_access(void)
* requires the P-Unit to talk to the PMIC and if this happens while
* we're holding the semaphore, the SoC hangs.
*/
- pm_qos_update_request(&iosf_mbi_pm_qos, 0);
+ cpu_latency_qos_update_request(&iosf_mbi_pm_qos, 0);
/* host driver writes to side band semaphore register */
ret = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
@@ -547,8 +547,7 @@ static int __init iosf_mbi_init(void)
{
iosf_debugfs_init();
- pm_qos_add_request(&iosf_mbi_pm_qos, PM_QOS_CPU_DMA_LATENCY,
- PM_QOS_DEFAULT_VALUE);
+ cpu_latency_qos_add_request(&iosf_mbi_pm_qos, PM_QOS_DEFAULT_VALUE);
return pci_register_driver(&iosf_mbi_pci_driver);
}
@@ -561,7 +560,7 @@ static void __exit iosf_mbi_exit(void)
pci_dev_put(mbi_pdev);
mbi_pdev = NULL;
- pm_qos_remove_request(&iosf_mbi_pm_qos);
+ cpu_latency_qos_remove_request(&iosf_mbi_pm_qos);
}
module_init(iosf_mbi_init);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 915bb1639763..aaff9ed7ff45 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -475,20 +475,8 @@ static int msr_save_cpuid_features(const struct x86_cpu_id *c)
}
static const struct x86_cpu_id msr_save_cpu_table[] = {
- {
- .vendor = X86_VENDOR_AMD,
- .family = 0x15,
- .model = X86_MODEL_ANY,
- .feature = X86_FEATURE_ANY,
- .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
- },
- {
- .vendor = X86_VENDOR_AMD,
- .family = 0x16,
- .model = X86_MODEL_ANY,
- .feature = X86_FEATURE_ANY,
- .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
- },
+ X86_MATCH_VENDOR_FAM(AMD, 0x15, &msr_save_cpuid_features),
+ X86_MATCH_VENDOR_FAM(AMD, 0x16, &msr_save_cpuid_features),
{}
};
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 99b6332ba540..b11ec5d8f8ac 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -71,5 +71,6 @@ $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE
KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
-I$(srctree)/arch/x86/boot
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
GCOV_PROFILE := n
UBSAN_SANITIZE := n
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S
index 64d135d1ee63..63aa51875ba0 100644
--- a/arch/x86/realmode/rm/realmode.lds.S
+++ b/arch/x86/realmode/rm/realmode.lds.S
@@ -71,7 +71,6 @@ SECTIONS
/DISCARD/ : {
*(.note*)
*(.debug*)
- *(.eh_frame*)
}
#include "pasyms.h"
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index a8985e1f7432..95d26a69088b 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -27,11 +27,6 @@ config X86_64
def_bool 64BIT
select MODULES_USE_ELF_RELA
-config ARCH_DEFCONFIG
- string
- default "arch/um/configs/i386_defconfig" if X86_32
- default "arch/um/configs/x86_64_defconfig" if X86_64
-
config 3_LEVEL_PGTABLES
bool "Three-level pagetables" if !64BIT
default 64BIT
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 33c51c064c77..77f70b969d14 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -21,6 +21,7 @@ obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
+subarch-y += ../kernel/sys_ia32.o
else
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 9649b5ad2ca2..2ed81e581755 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -7,7 +7,7 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
-#include <generated/user_constants.h>
+#include <asm/unistd.h>
#include <asm/syscall.h>
#define __NO_STUBS
@@ -26,11 +26,11 @@
#define old_mmap sys_old_mmap
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
+#define __SYSCALL_I386(nr, sym) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index c8bc7fb8cbd6..2e8544dafbb0 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -7,7 +7,7 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
-#include <generated/user_constants.h>
+#include <asm/unistd.h>
#include <asm/syscall.h>
#define __NO_STUBS
@@ -36,11 +36,14 @@
#define stub_execveat sys_execveat
#define stub_rt_sigreturn sys_rt_sigreturn
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_X32(nr, sym)
+#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
+
+#define __SYSCALL_64(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
+#define __SYSCALL_64(nr, sym) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index 5b37b7f952dd..c51dd8363d25 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -9,18 +9,6 @@
#include <linux/ptrace.h>
#include <asm/types.h>
-#ifdef __i386__
-#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
-static char syscalls[] = {
-#include <asm/syscalls_32.h>
-};
-#else
-#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
-static char syscalls[] = {
-#include <asm/syscalls_64.h>
-};
-#endif
-
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -94,7 +82,4 @@ void foo(void)
DEFINE(UM_PROT_READ, PROT_READ);
DEFINE(UM_PROT_WRITE, PROT_WRITE);
DEFINE(UM_PROT_EXEC, PROT_EXEC);
-
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
- DEFINE(NR_syscalls, sizeof(syscalls));
}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 1f756ffffe8b..507f4fb88fa7 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,9 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+#include <asm/io_bitmap.h>
+#endif
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
@@ -837,6 +840,25 @@ static void xen_load_sp0(unsigned long sp0)
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static void xen_update_io_bitmap(void)
+{
+ struct physdev_set_iobitmap iobitmap;
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
+ native_tss_update_io_bitmap();
+
+ iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
+ tss->x86_tss.io_bitmap_base;
+ if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
+ iobitmap.nr_ports = 0;
+ else
+ iobitmap.nr_ports = IO_BITMAP_BITS;
+
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
+}
+#endif
+
static void xen_io_delay(void)
{
}
@@ -896,14 +918,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
+#ifdef CONFIG_X86_64
+ unsigned int which;
+ u64 base;
+#endif
ret = 0;
switch (msr) {
#ifdef CONFIG_X86_64
- unsigned which;
- u64 base;
-
case MSR_FS_BASE: which = SEGBASE_FS; goto set;
case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
@@ -1046,6 +1069,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.write_idt_entry = xen_write_idt_entry,
.load_sp0 = xen_load_sp0,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .update_io_bitmap = xen_update_io_bitmap,
+#endif
.io_delay = xen_io_delay,
/* Xen takes care of %gs when switching to usermode for us */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7a43b2ae19f1..2097fa0ebdb5 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -132,7 +132,7 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
continue;
- rc = cpu_down(cpu);
+ rc = remove_cpu(cpu);
if (rc == 0) {
/*
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index befbdd8b17f0..c8897aad13cd 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -145,12 +145,19 @@ static struct notifier_block xen_pvclock_gtod_notifier = {
.notifier_call = xen_pvclock_gtod_notify,
};
+static int xen_cs_enable(struct clocksource *cs)
+{
+ vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
+ return 0;
+}
+
static struct clocksource xen_clocksource __read_mostly = {
- .name = "xen",
- .rating = 400,
- .read = xen_clocksource_get_cycles,
- .mask = ~0,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .name = "xen",
+ .rating = 400,
+ .read = xen_clocksource_get_cycles,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .enable = xen_cs_enable,
};
/*
@@ -412,12 +419,13 @@ void xen_restore_time_memory_area(void)
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
/*
- * We don't disable VCLOCK_PVCLOCK entirely if it fails to register the
- * secondary time info with Xen or if we migrated to a host without the
- * necessary flags. On both of these cases what happens is either
- * process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT
- * bit set. Userspace checks the latter and if 0, it discards the data
- * in pvti and fallbacks to a system call for a reliable timestamp.
+ * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to
+ * register the secondary time info with Xen or if we migrated to a
+ * host without the necessary flags. On both of these cases what
+ * happens is either process seeing a zeroed out pvti or seeing no
+ * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and
+ * if 0, it discards the data in pvti and fallbacks to a system
+ * call for a reliable timestamp.
*/
if (ret != 0)
pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
@@ -443,7 +451,7 @@ static void xen_setup_vsyscall_time_info(void)
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
if (ret) {
- pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret);
+ pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
free_page((unsigned long)ti);
return;
}
@@ -460,14 +468,14 @@ static void xen_setup_vsyscall_time_info(void)
if (!ret)
free_page((unsigned long)ti);
- pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n");
+ pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
return;
}
xen_clock = ti;
pvclock_set_pvti_cpu0_va(xen_clock);
- xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK;
+ xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
}
static void __init xen_time_init(void)