summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-08-10 15:36:23 +0300
committerIngo Molnar <mingo@kernel.org>2016-08-10 15:36:23 +0300
commitfdbdfefbabefcdf3f57560163b43fdc4cf95eb2f (patch)
tree1d0c420d4eaff48cf2486f10dded8d551241ee94 /arch/x86
parent6731b0d611a1274f9e785fa0189ac2aeeabd0591 (diff)
parenta0cba2179ea4c1820fce2ee046b6ed90ecc56196 (diff)
downloadlinux-fdbdfefbabefcdf3f57560163b43fdc4cf95eb2f.tar.xz
Merge branch 'linus' into timers/urgent, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/entry/common.c6
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/vdso/Makefile3
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c25
-rw-r--r--arch/x86/entry/vdso/vdso2c.h6
-rw-r--r--arch/x86/events/amd/ibs.c3
-rw-r--r--arch/x86/events/amd/iommu.c2
-rw-r--r--arch/x86/events/core.c14
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore.h1
-rw-r--r--arch/x86/include/asm/dma-mapping.h5
-rw-r--r--arch/x86/include/asm/elf.h4
-rw-r--r--arch/x86/include/asm/fpu/internal.h16
-rw-r--r--arch/x86/include/asm/kvm_host.h31
-rw-r--r--arch/x86/include/asm/livepatch.h1
-rw-r--r--arch/x86/include/asm/mc146818rtc.h1
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/pvclock.h39
-rw-r--r--arch/x86/include/asm/rtc.h1
-rw-r--r--arch/x86/include/asm/svm.h1
-rw-r--r--arch/x86/include/asm/swiotlb.h4
-rw-r--r--arch/x86/include/asm/syscall.h5
-rw-r--r--arch/x86/include/asm/thread_info.h71
-rw-r--r--arch/x86/include/asm/topology.h1
-rw-r--r--arch/x86/include/asm/uaccess.h26
-rw-r--r--arch/x86/include/asm/uaccess_32.h2
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/virtext.h8
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h9
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/amd_gart_64.c21
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/ipi.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/probe_64.c3
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c3
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/early-quirks.c404
-rw-r--r--arch/x86/kernel/fpu/signal.c12
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/hw_breakpoint.c3
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c3
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/io_delay.c2
-rw-r--r--arch/x86/kernel/irq_32.c1
-rw-r--r--arch/x86/kernel/irq_64.c1
-rw-r--r--arch/x86/kernel/kdebugfs.c2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/mpparse.c1
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c2
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c14
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/pci-nommu.c4
-rw-r--r--arch/x86/kernel/pci-swiotlb.c6
-rw-r--r--arch/x86/kernel/pmem.c2
-rw-r--r--arch/x86/kernel/process.c3
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c15
-rw-r--r--arch/x86/kernel/pvclock.c17
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/signal.c40
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/test_rodata.c5
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/kvm/i8254.c4
-rw-r--r--arch/x86/kvm/iommu.c4
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/irq_comm.c49
-rw-r--r--arch/x86/kvm/lapic.c544
-rw-r--r--arch/x86/kvm/lapic.h19
-rw-r--r--arch/x86/kvm/mmu.c32
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/paging_tmpl.h10
-rw-r--r--arch/x86/kvm/pmu_intel.c2
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/trace.h15
-rw-r--r--arch/x86/kvm/vmx.c401
-rw-r--r--arch/x86/kvm/x86.c178
-rw-r--r--arch/x86/lib/cache-smp.c2
-rw-r--r--arch/x86/lib/cpu.c3
-rw-r--r--arch/x86/lib/csum-partial_64.c2
-rw-r--r--arch/x86/lib/csum-wrappers_64.c2
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/hweight.S2
-rw-r--r--arch/x86/lib/memcpy_32.c2
-rw-r--r--arch/x86/lib/mmx_32.c2
-rw-r--r--arch/x86/lib/msr-reg-export.c2
-rw-r--r--arch/x86/lib/msr-smp.c2
-rw-r--r--arch/x86/lib/msr.c3
-rw-r--r--arch/x86/lib/string_32.c2
-rw-r--r--arch/x86/lib/usercopy.c2
-rw-r--r--arch/x86/lib/usercopy_32.c2
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/mm/amdtopology.c1
-rw-r--r--arch/x86/mm/dump_pagetables.c6
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c1
-rw-r--r--arch/x86/mm/iomap_32.c2
-rw-r--r--arch/x86/mm/ioremap.c1
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c1
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c2
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/mmio-mod.c2
-rw-r--r--arch/x86/mm/numa.c1
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/pat.c1
-rw-r--r--arch/x86/mm/pat_rbtree.c1
-rw-r--r--arch/x86/mm/pf_in.c1
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/physaddr.c2
-rw-r--r--arch/x86/mm/srat.c2
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/sta2x11-fixup.c2
-rw-r--r--arch/x86/pci/vmd.c57
-rw-r--r--arch/x86/pci/xen.c2
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/platform/efi/early_printk.c4
-rw-r--r--arch/x86/platform/efi/efi.c1
-rw-r--r--arch/x86/platform/efi/efi_64.c3
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c2
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_vrtc.c1
-rw-r--r--arch/x86/platform/intel-mid/pwr.c4
-rw-r--r--arch/x86/platform/intel-mid/sfi.c2
-rw-r--r--arch/x86/platform/olpc/olpc.c2
-rw-r--r--arch/x86/platform/olpc/olpc_ofw.c5
-rw-r--r--arch/x86/platform/ts5500/ts5500.c6
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c2
-rw-r--r--arch/x86/power/hibernate_64.c18
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/purgatory/Makefile2
-rw-r--r--arch/x86/realmode/rm/Makefile2
-rw-r--r--arch/x86/um/delay.c2
-rw-r--r--arch/x86/um/vdso/Makefile3
-rw-r--r--arch/x86/xen/debugfs.c1
-rw-r--r--arch/x86/xen/enlighten.c7
-rw-r--r--arch/x86/xen/mmu.c3
-rw-r--r--arch/x86/xen/p2m.c2
-rw-r--r--arch/x86/xen/platform-pci-unplug.c2
-rw-r--r--arch/x86/xen/setup.c2
178 files changed, 1493 insertions, 954 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fa55851d2a9..c580d8c33562 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -80,6 +80,7 @@ config X86
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_AOUT if X86_32
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
@@ -91,6 +92,7 @@ config X86
select HAVE_ARCH_SOFT_DIRTY if X86_64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_EBPF_JIT if X86_64
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
@@ -111,6 +113,7 @@ config X86
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
+ select HAVE_GCC_PLUGINS
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_HW_BREAKPOINT
select HAVE_IDE
@@ -151,6 +154,7 @@ config X86
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select PERF_EVENTS
select RTC_LIB
+ select RTC_MC146818_LIB
select SPARSE_IRQ
select SRCU
select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index be8e688fa0d4..12ea8f8384f4 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -96,7 +96,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
$(call if_changed,zoffset)
-AFLAGS_header.o += -I$(obj)
+AFLAGS_header.o += -I$(objtree)/$(obj)
$(obj)/header.o: $(obj)/zoffset.h
LDFLAGS_setup.elf := -T
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a1e71d431fed..1433f6b4607d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -204,8 +204,12 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
* handling, because syscall restart has a fixup for compat
* syscalls. The fixup is exercised by the ptrace_syscall_32
* selftest.
+ *
+ * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+ * special case only applies after poking regs and before the
+ * very next return to user mode.
*/
- ti->status &= ~TS_COMPAT;
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
user_enter_irqoff();
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4cddd17153fb..f848572169ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -294,7 +294,7 @@
# 285 sys_setaltroot
286 i386 add_key sys_add_key
287 i386 request_key sys_request_key
-288 i386 keyctl sys_keyctl
+288 i386 keyctl sys_keyctl compat_sys_keyctl
289 i386 ioprio_set sys_ioprio_set
290 i386 ioprio_get sys_ioprio_get
291 i386 inotify_init sys_inotify_init
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 6ba89a1ab0e5..d5409660f5de 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -75,7 +75,7 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
-$(vobjs): KBUILD_CFLAGS += $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -145,6 +145,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 2f02d23a05ef..94d54d0defa7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
- u64 tsc, pvti_tsc;
- u64 last, delta, pvti_system_time;
- u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+ u64 last;
+ u32 version;
/*
* Note: The kernel and hypervisor must guarantee that cpu ID
@@ -123,29 +122,15 @@ static notrace cycle_t vread_pvclock(int *mode)
*/
do {
- version = pvti->version;
-
- smp_rmb();
+ version = pvclock_read_begin(pvti);
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
return 0;
}
- tsc = rdtsc_ordered();
- pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
- pvti_tsc_shift = pvti->tsc_shift;
- pvti_system_time = pvti->system_time;
- pvti_tsc = pvti->tsc_timestamp;
-
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while (unlikely((version & 1) || version != pvti->version));
-
- delta = tsc - pvti_tsc;
- ret = pvti_system_time +
- pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
- pvti_tsc_shift);
+ ret = __pvclock_read_cycles(pvti);
+ } while (pvclock_read_retry(pvti, version));
/* refer to vread_tsc() comment for rationale */
last = gtod->cycle_last;
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 63a03bb91497..4f741192846d 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -22,6 +22,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
+ if (hdr->e_type != ET_DYN)
+ fail("input is not a shared object\n");
+
/* Walk the segment table. */
for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
if (GET_LE(&pt[i].p_type) == PT_LOAD) {
@@ -49,6 +52,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
if (stripped_len < load_size)
fail("stripped input is too short\n");
+ if (!dyn)
+ fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n");
+
/* Walk the dynamic table */
for (i = 0; dyn + i < dyn_end &&
GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 155ea5324ae0..b26ee32f73e8 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -7,7 +7,8 @@
*/
#include <linux/perf_event.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/pci.h>
#include <linux/ptrace.h>
#include <linux/syscore_ops.h>
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 6011a573dd64..b28200dea715 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -12,7 +12,7 @@
*/
#include <linux/perf_event.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/cpumask.h>
#include <linux/slab.h>
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index c17f0de5fd39..d0efb5cb1b00 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -17,7 +17,8 @@
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/init.h>
#include <linux/kdebug.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
@@ -262,10 +263,13 @@ static bool check_hw_exists(void)
return true;
msr_fail:
- pr_cont("Broken PMU hardware detected, using software events only.\n");
- printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
- boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
- reg, val_new);
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ pr_cont("PMU not available due to virtualization, using software events only.\n");
+ } else {
+ pr_cont("Broken PMU hardware detected, using software events only.\n");
+ pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
+ reg, val_new);
+ }
return false;
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 3f3d0d67749b..463dc7a5a6c3 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,3 +1,5 @@
+#include <linux/module.h>
+
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "uncore.h"
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index d6063e438158..78b9c23e2d8d 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -1,4 +1,3 @@
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <asm/apicdef.h>
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 3a27b93e6261..44461626830e 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -9,7 +9,6 @@
#include <linux/kmemcheck.h>
#include <linux/scatterlist.h>
#include <linux/dma-debug.h>
-#include <linux/dma-attrs.h>
#include <asm/io.h>
#include <asm/swiotlb.h>
#include <linux/dma-contiguous.h>
@@ -48,11 +47,11 @@ extern int dma_supported(struct device *hwdev, u64 mask);
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
- struct dma_attrs *attrs);
+ unsigned long attrs);
extern void dma_generic_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
- struct dma_attrs *attrs);
+ unsigned long attrs);
#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index fea7724141a0..e7f155c3045e 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -344,8 +344,8 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
*/
static inline int mmap_is_ia32(void)
{
- return config_enabled(CONFIG_X86_32) ||
- (config_enabled(CONFIG_COMPAT) &&
+ return IS_ENABLED(CONFIG_X86_32) ||
+ (IS_ENABLED(CONFIG_COMPAT) &&
test_thread_flag(TIF_ADDR32));
}
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 116b58347501..2737366ea583 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -137,9 +137,9 @@ static inline int copy_fregs_to_user(struct fregs_state __user *fx)
static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
{
- if (config_enabled(CONFIG_X86_32))
+ if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
/* See comment in copy_fxregs_to_kernel() below. */
@@ -150,10 +150,10 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
int err;
- if (config_enabled(CONFIG_X86_32)) {
+ if (IS_ENABLED(CONFIG_X86_32)) {
err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
- if (config_enabled(CONFIG_AS_FXSAVEQ)) {
+ if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
/* See comment in copy_fxregs_to_kernel() below. */
@@ -166,9 +166,9 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
- if (config_enabled(CONFIG_X86_32))
+ if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
/* See comment in copy_fxregs_to_kernel() below. */
@@ -190,9 +190,9 @@ static inline int copy_user_to_fregs(struct fregs_state __user *fx)
static inline void copy_fxregs_to_kernel(struct fpu *fpu)
{
- if (config_enabled(CONFIG_X86_32))
+ if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
else {
/* Using "rex64; fxsave %0" is broken because, if the memory
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69e62862b622..33ae3a4d0159 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -35,8 +35,9 @@
#include <asm/asm.h>
#include <asm/kvm_page_track.h>
-#define KVM_MAX_VCPUS 255
-#define KVM_SOFT_MAX_VCPUS 160
+#define KVM_MAX_VCPUS 288
+#define KVM_SOFT_MAX_VCPUS 240
+#define KVM_MAX_VCPU_ID 1023
#define KVM_USER_MEM_SLOTS 509
/* memory slots that are not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 3
@@ -599,6 +600,7 @@ struct kvm_vcpu_arch {
u64 mcg_cap;
u64 mcg_status;
u64 mcg_ctl;
+ u64 mcg_ext_ctl;
u64 *mce_banks;
/* Cache MMIO info */
@@ -682,9 +684,12 @@ struct kvm_arch_memory_slot {
struct kvm_apic_map {
struct rcu_head rcu;
u8 mode;
- struct kvm_lapic *phys_map[256];
- /* first index is cluster id second is cpu id in a cluster */
- struct kvm_lapic *logical_map[16][16];
+ u32 max_apic_id;
+ union {
+ struct kvm_lapic *xapic_flat_map[8];
+ struct kvm_lapic *xapic_cluster_map[16][4];
+ };
+ struct kvm_lapic *phys_map[];
};
/* Hyper-V emulation context */
@@ -779,6 +784,9 @@ struct kvm_arch {
u32 ldr_mode;
struct page *avic_logical_id_table_page;
struct page *avic_physical_id_table_page;
+
+ bool x2apic_format;
+ bool x2apic_broadcast_quirk_disabled;
};
struct kvm_vm_stat {
@@ -1006,6 +1014,11 @@ struct kvm_x86_ops {
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+ void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
+
+ void (*setup_mce)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@@ -1026,7 +1039,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask);
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask);
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -1077,6 +1090,10 @@ extern u32 kvm_max_guest_tsc_khz;
extern u8 kvm_tsc_scaling_ratio_frac_bits;
/* maximum allowed value of TSC scaling ratio */
extern u64 kvm_max_tsc_scaling_ratio;
+/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
+extern u64 kvm_default_tsc_scaling_ratio;
+
+extern u64 kvm_mce_cap_supported;
enum emulation_result {
EMULATE_DONE, /* no further processing */
@@ -1352,7 +1369,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq);
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index a7f9181f63f3..ed80003ce3e2 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -22,7 +22,6 @@
#define _ASM_X86_LIVEPATCH_H
#include <asm/setup.h>
-#include <linux/module.h>
#include <linux/ftrace.h>
static inline int klp_check_compiler_support(void)
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 0f555cc31984..24acd9ba7837 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -6,7 +6,6 @@
#include <asm/io.h>
#include <asm/processor.h>
-#include <linux/mc146818rtc.h>
#ifndef RTC_PORT
#define RTC_PORT(x) (0x70 + (x))
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 396348196aa7..d8abfcf524d1 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -155,7 +155,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
#ifdef CONFIG_X86_64
static inline bool is_64bit_mm(struct mm_struct *mm)
{
- return !config_enabled(CONFIG_IA32_EMULATION) ||
+ return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
!(mm->context.ia32_compat == TIF_IA32);
}
#else
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7c1c89598688..d019f0cc80ec 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -25,6 +25,24 @@ void pvclock_resume(void);
void pvclock_touch_watchdogs(void);
+static __always_inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
+{
+ unsigned version = src->version & ~1;
+ /* Make sure that the version is read before the data. */
+ virt_rmb();
+ return version;
+}
+
+static __always_inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+ unsigned version)
+{
+ /* Make sure that the version is re-read after the data. */
+ virt_rmb();
+ return unlikely(version != src->version);
+}
+
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
@@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
}
static __always_inline
-unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
- cycle_t *cycles, u8 *flags)
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
{
- unsigned version;
- cycle_t offset;
- u64 delta;
-
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
- delta = rdtsc_ordered() - src->tsc_timestamp;
- offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
- src->tsc_shift);
- *cycles = src->system_time + offset;
- *flags = src->flags;
- return version;
+ u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+ cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+ src->tsc_shift);
+ return src->system_time + offset;
}
struct pvclock_vsyscall_time_info {
diff --git a/arch/x86/include/asm/rtc.h b/arch/x86/include/asm/rtc.h
deleted file mode 100644
index f71c3b0ed360..000000000000
--- a/arch/x86/include/asm/rtc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/rtc.h>
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index d0fe23ec7e98..14824fc78f7e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -193,7 +193,6 @@ struct __attribute__ ((__packed__)) vmcb {
struct vmcb_save_area save;
};
-#define SVM_CPUID_FEATURE_SHIFT 2
#define SVM_CPUID_FUNC 0x8000000a
#define SVM_VM_CR_SVM_DISABLE 4
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index ab05d73e2bb7..d2f69b9ff732 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -31,9 +31,9 @@ static inline void dma_mark_clean(void *addr, size_t size) {}
extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs);
+ unsigned long attrs);
extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
- struct dma_attrs *attrs);
+ unsigned long attrs);
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 999b7cd2e78c..4e23dd15c661 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
- if (task_thread_info(task)->status & TS_COMPAT)
+ if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
@@ -239,9 +239,6 @@ static inline int syscall_get_arch(void)
* TS_COMPAT is set for 32-bit syscall entry and then
* remains set until we return to user mode.
*
- * TIF_IA32 tasks should always have TS_COMPAT set at
- * system call time.
- *
* x32 tasks should be considered AUDIT_ARCH_X86_64.
*/
if (task_thread_info(current)->status & TS_COMPAT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89bff044a6f5..8b7c8d8e0852 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -176,6 +176,50 @@ static inline unsigned long current_stack_pointer(void)
return sp;
}
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ * 1 if within a frame
+ * -1 if placed across a frame boundary (or outside stack)
+ * 0 unable to determine (no frame pointers, etc)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+ const void * const stackend,
+ const void *obj, unsigned long len)
+{
+#if defined(CONFIG_FRAME_POINTER)
+ const void *frame = NULL;
+ const void *oldframe;
+
+ oldframe = __builtin_frame_address(1);
+ if (oldframe)
+ frame = __builtin_frame_address(2);
+ /*
+ * low ----------------------------------------------> high
+ * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+ * ^----------------^
+ * allow copies only within here
+ */
+ while (stack <= frame && frame < stackend) {
+ /*
+ * If obj + len extends past the last frame, this
+ * check won't pass and the next frame will be 0,
+ * causing us to bail out and correctly report
+ * the copy as invalid.
+ */
+ if (obj + len <= frame)
+ return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+ oldframe = frame;
+ frame = *(const void * const *)frame;
+ }
+ return -1;
+#else
+ return 0;
+#endif
+}
+
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
@@ -219,32 +263,11 @@ static inline unsigned long current_stack_pointer(void)
* have to worry about atomic accesses.
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
+#ifdef CONFIG_COMPAT
+#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
+#endif
#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK 1
-static inline void set_restore_sigmask(void)
-{
- struct thread_info *ti = current_thread_info();
- ti->status |= TS_RESTORE_SIGMASK;
- WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
- return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
- struct thread_info *ti = current_thread_info();
- if (!(ti->status & TS_RESTORE_SIGMASK))
- return false;
- ti->status &= ~TS_RESTORE_SIGMASK;
- return true;
-}
static inline bool in_ia32_syscall(void)
{
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 43e87a3dd95c..cf75871d2f81 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -36,6 +36,7 @@
#include <linux/cpumask.h>
#include <asm/mpspec.h>
+#include <asm/percpu.h>
/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c03bfb68c503..a0ae610b9280 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -761,9 +761,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
* case, and do only runtime checking for non-constant sizes.
*/
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(to, n, false);
n = _copy_from_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_from_user_overflow();
else
__copy_from_user_overflow(sz, n);
@@ -781,9 +782,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
might_fault();
/* See the comment in copy_from_user() above. */
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(from, n, true);
n = _copy_to_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_to_user_overflow();
else
__copy_to_user_overflow(sz, n);
@@ -812,21 +814,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
#define user_access_begin() __uaccess_begin()
#define user_access_end() __uaccess_end()
-#define unsafe_put_user(x, ptr) \
-({ \
+#define unsafe_put_user(x, ptr, err_label) \
+do { \
int __pu_err; \
__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \
- __builtin_expect(__pu_err, 0); \
-})
+ if (unlikely(__pu_err)) goto err_label; \
+} while (0)
-#define unsafe_get_user(x, ptr) \
-({ \
+#define unsafe_get_user(x, ptr, err_label) \
+do { \
int __gu_err; \
unsigned long __gu_val; \
__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- __builtin_expect(__gu_err, 0); \
-})
+ if (unlikely(__gu_err)) goto err_label; \
+} while (0)
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 4b32da24faaf..7d3bdd1ed697 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -37,6 +37,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
static __always_inline unsigned long __must_check
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
+ check_object_size(from, n, true);
return __copy_to_user_ll(to, from, n);
}
@@ -95,6 +96,7 @@ static __always_inline unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_fault();
+ check_object_size(to, n, false);
if (__builtin_constant_p(n)) {
unsigned long ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 2eac2aa3e37f..673059a109fe 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -54,6 +54,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
{
int ret = 0;
+ check_object_size(dst, size, false);
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
@@ -119,6 +120,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
{
int ret = 0;
+ check_object_size(src, size, true);
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index cce9ee68e335..0116b2ee9e64 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -83,23 +83,19 @@ static inline void cpu_emergency_vmxoff(void)
*/
static inline int cpu_has_svm(const char **msg)
{
- uint32_t eax, ebx, ecx, edx;
-
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
if (msg)
*msg = "not amd";
return 0;
}
- cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
- if (eax < SVM_CPUID_FUNC) {
+ if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
if (msg)
*msg = "can't execute cpuid_8000000a";
return 0;
}
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+ if (!boot_cpu_has(X86_FEATURE_SVM)) {
if (msg)
*msg = "svm not available";
return 0;
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index acd844c017d3..f02f025ff988 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -2,12 +2,11 @@
#define _ASM_X86_XEN_PAGE_COHERENT_H
#include <asm/page.h>
-#include <linux/dma-attrs.h>
#include <linux/dma-mapping.h>
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vstart = (void*)__get_free_pages(flags, get_order(size));
*dma_handle = virt_to_phys(vstart);
@@ -16,18 +15,18 @@ static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
free_pages((unsigned long) cpu_addr, get_order(size));
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
- enum dma_data_direction dir, struct dma_attrs *attrs) { }
+ enum dma_data_direction dir, unsigned long attrs) { }
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs) { }
+ unsigned long attrs) { }
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6738e5c82cca..90d84c3eee53 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -28,7 +28,7 @@
#include <linux/acpi_pmtmr.h>
#include <linux/efi.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/dmi.h>
#include <linux/irq.h>
#include <linux/slab.h>
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 4b28159e0421..bdfad642123f 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -5,7 +5,7 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/cpu.h>
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 8e3842fc8bea..63ff468a7986 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -20,7 +20,6 @@
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
-#include <linux/module.h>
#include <linux/topology.h>
#include <linux/interrupt.h>
#include <linux/bitmap.h>
@@ -242,7 +241,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
static dma_addr_t gart_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned long bus;
phys_addr_t paddr = page_to_phys(page) + offset;
@@ -264,7 +263,7 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page,
*/
static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned long iommu_page;
int npages;
@@ -286,7 +285,7 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
* Wrapper for pci_unmap_single working with scatterlists.
*/
static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *s;
int i;
@@ -294,7 +293,7 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
for_each_sg(sg, s, nents, i) {
if (!s->dma_length || !s->length)
break;
- gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
+ gart_unmap_page(dev, s->dma_address, s->dma_length, dir, 0);
}
}
@@ -316,7 +315,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
addr = dma_map_area(dev, addr, s->length, dir, 0);
if (addr == bad_dma_addr) {
if (i > 0)
- gart_unmap_sg(dev, sg, i, dir, NULL);
+ gart_unmap_sg(dev, sg, i, dir, 0);
nents = 0;
sg[0].dma_length = 0;
break;
@@ -387,7 +386,7 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
* Merge chunks that have page aligned sizes into a continuous mapping.
*/
static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *s, *ps, *start_sg, *sgmap;
int need = 0, nextneed, i, out, start;
@@ -457,7 +456,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
error:
flush_gart();
- gart_unmap_sg(dev, sg, out, dir, NULL);
+ gart_unmap_sg(dev, sg, out, dir, 0);
/* When it was forced or merged try again in a dumb way */
if (force_iommu || iommu_merge) {
@@ -477,7 +476,7 @@ error:
/* allocate and map a coherent mapping */
static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
- gfp_t flag, struct dma_attrs *attrs)
+ gfp_t flag, unsigned long attrs)
{
dma_addr_t paddr;
unsigned long align_mask;
@@ -509,9 +508,9 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
/* free a coherent mapping */
static void
gart_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, struct dma_attrs *attrs)
+ dma_addr_t dma_addr, unsigned long attrs)
{
- gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
+ gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index e45ec2b4e15e..4fdf6230d93c 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -9,7 +9,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/errno.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/spinlock.h>
#include <asm/amd_nb.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0fd3d659f13c..cea4fc19e844 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -23,7 +23,7 @@
#include <linux/bootmem.h>
#include <linux/ftrace.h>
#include <linux/ioport.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscore_ops.h>
#include <linux/delay.h>
#include <linux/timex.h>
@@ -147,7 +147,7 @@ static int force_enable_local_apic __initdata;
*/
static int __init parse_lapic(char *arg)
{
- if (config_enabled(CONFIG_X86_32) && !arg)
+ if (IS_ENABLED(CONFIG_X86_32) && !arg)
force_enable_local_apic = 1;
else if (arg && !strncmp(arg, "notscdeadline", 13))
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 048747778d37..5b2ae106bd4a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -15,7 +15,7 @@
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/hardirq.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 2cebf59092d8..c05688b2deff 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -11,7 +11,6 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 7788ce643bf4..f29501e1a5c1 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -16,7 +16,7 @@
#include <linux/notifier.h>
#include <linux/kprobes.h>
#include <linux/nmi.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/delay.h>
#ifdef CONFIG_HARDLOCKUP_DETECTOR
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f072b9572634..7491f417a8e4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -39,7 +39,7 @@
#include <linux/mc146818rtc.h>
#include <linux/compiler.h>
#include <linux/acpi.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscore_ops.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 2a0f225afebd..3a205d4a12d0 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -8,7 +8,6 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/cpu.h>
-#include <linux/module.h>
#include <asm/smp.h>
#include <asm/mtrr.h>
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 93edfa01b408..7c43e716c158 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -8,7 +8,7 @@
*/
#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1793dba7a741..c303054b90b5 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -11,10 +11,9 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
-#include <linux/init.h>
#include <linux/hardirq.h>
#include <linux/dmar.h>
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a5e400afc563..6066d945c40e 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -523,7 +523,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
struct apic_chip_data *data = irq_data->chip_data;
int err, irq = irq_data->irq;
- if (!config_enabled(CONFIG_SMP))
+ if (!IS_ENABLED(CONFIG_SMP))
return -EPERM;
if (!cpumask_intersects(dest, cpu_online_mask))
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 64dd38fbf218..09b59adaea3f 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -12,7 +12,7 @@
#include <linux/proc_fs.h>
#include <linux/threads.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/sched.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d22a7b9c4f0e..809eda03c527 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2,7 +2,7 @@
#include <linux/linkage.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/percpu.h>
#include <linux/string.h>
#include <linux/ctype.h>
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 73d391ae452f..27e46658ebe3 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -21,7 +21,8 @@
*
*/
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index abf601235b29..fcd484d2bb03 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -5,7 +5,7 @@
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/thread_info.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index fbb5e90557a5..e42117d5f4d7 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,7 +1,7 @@
#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <linux/cpu.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/slab.h>
/**
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 10c11b4da31d..8f44c5a50ab8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -13,7 +13,8 @@
#include <linux/types.h>
#include <linux/time.h>
#include <linux/clocksource.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/hardirq.h>
#include <linux/efi.h>
#include <linux/interrupt.h>
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 31e951ce6dff..3b442b64c72d 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -17,7 +17,6 @@
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/smp.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 16e37a2581ac..fdc55215d44d 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -4,7 +4,7 @@
*/
#define DEBUG
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/mm.h>
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index d76f13d6d8d6..6d9b45549109 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -2,7 +2,6 @@
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
-#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 7d393ecdeee6..28f1b54b7fad 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -38,7 +38,7 @@
#include <linux/stop_machine.h>
#include <linux/kvm_para.h>
#include <linux/uaccess.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/sort.h>
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 2e8caf03f593..181eabecae25 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -12,7 +12,7 @@
*/
#include <linux/percpu.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/smp.h>
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 8cac429b6a1d..1ff0598d309c 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -22,7 +22,8 @@
*/
#include <linux/dmi.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <asm/div64.h>
#include <asm/x86_init.h>
#include <asm/hypervisor.h>
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 9ef978d69c22..9616cf76940c 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -20,7 +20,7 @@
#include <linux/delay.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 948d77da3881..09675712eba8 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -7,7 +7,7 @@
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6dede08dd98b..9ee4520ce83c 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -7,7 +7,7 @@
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 57b71373bae3..de7501edb21c 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -237,36 +237,19 @@ static void __init intel_remapping_check(int num, int slot, int func)
* despite the efforts of the "RAM buffer" approach, which simply rounds
* memory boundaries up to 64M to try to catch space that may decode
* as RAM and so is not suitable for MMIO.
- *
- * And yes, so far on current devices the base addr is always under 4G.
*/
-static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_size)
-{
- u32 base;
-
- /*
- * For the PCI IDs in this quirk, the stolen base is always
- * in 0x5c, aka the BDSM register (yes that's really what
- * it's called).
- */
- base = read_pci_config(num, slot, func, 0x5c);
- base &= ~((1<<20) - 1);
-
- return base;
-}
#define KB(x) ((x) * 1024UL)
#define MB(x) (KB (KB (x)))
-#define GB(x) (MB (KB (x)))
static size_t __init i830_tseg_size(void)
{
- u8 tmp = read_pci_config_byte(0, 0, 0, I830_ESMRAMC);
+ u8 esmramc = read_pci_config_byte(0, 0, 0, I830_ESMRAMC);
- if (!(tmp & TSEG_ENABLE))
+ if (!(esmramc & TSEG_ENABLE))
return 0;
- if (tmp & I830_TSEG_SIZE_1M)
+ if (esmramc & I830_TSEG_SIZE_1M)
return MB(1);
else
return KB(512);
@@ -274,27 +257,26 @@ static size_t __init i830_tseg_size(void)
static size_t __init i845_tseg_size(void)
{
- u8 tmp = read_pci_config_byte(0, 0, 0, I845_ESMRAMC);
+ u8 esmramc = read_pci_config_byte(0, 0, 0, I845_ESMRAMC);
+ u8 tseg_size = esmramc & I845_TSEG_SIZE_MASK;
- if (!(tmp & TSEG_ENABLE))
+ if (!(esmramc & TSEG_ENABLE))
return 0;
- switch (tmp & I845_TSEG_SIZE_MASK) {
- case I845_TSEG_SIZE_512K:
- return KB(512);
- case I845_TSEG_SIZE_1M:
- return MB(1);
+ switch (tseg_size) {
+ case I845_TSEG_SIZE_512K: return KB(512);
+ case I845_TSEG_SIZE_1M: return MB(1);
default:
- WARN_ON(1);
- return 0;
+ WARN(1, "Unknown ESMRAMC value: %x!\n", esmramc);
}
+ return 0;
}
static size_t __init i85x_tseg_size(void)
{
- u8 tmp = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC);
+ u8 esmramc = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC);
- if (!(tmp & TSEG_ENABLE))
+ if (!(esmramc & TSEG_ENABLE))
return 0;
return MB(1);
@@ -314,285 +296,287 @@ static size_t __init i85x_mem_size(void)
* On 830/845/85x the stolen memory base isn't available in any
* register. We need to calculate it as TOM-TSEG_SIZE-stolen_size.
*/
-static u32 __init i830_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i830_stolen_base(int num, int slot, int func,
+ size_t stolen_size)
{
- return i830_mem_size() - i830_tseg_size() - stolen_size;
+ return (phys_addr_t)i830_mem_size() - i830_tseg_size() - stolen_size;
}
-static u32 __init i845_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i845_stolen_base(int num, int slot, int func,
+ size_t stolen_size)
{
- return i830_mem_size() - i845_tseg_size() - stolen_size;
+ return (phys_addr_t)i830_mem_size() - i845_tseg_size() - stolen_size;
}
-static u32 __init i85x_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i85x_stolen_base(int num, int slot, int func,
+ size_t stolen_size)
{
- return i85x_mem_size() - i85x_tseg_size() - stolen_size;
+ return (phys_addr_t)i85x_mem_size() - i85x_tseg_size() - stolen_size;
}
-static u32 __init i865_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i865_stolen_base(int num, int slot, int func,
+ size_t stolen_size)
{
+ u16 toud;
+
/*
* FIXME is the graphics stolen memory region
* always at TOUD? Ie. is it always the last
* one to be allocated by the BIOS?
*/
- return read_pci_config_16(0, 0, 0, I865_TOUD) << 16;
+ toud = read_pci_config_16(0, 0, 0, I865_TOUD);
+
+ return (phys_addr_t)toud << 16;
+}
+
+static phys_addr_t __init gen3_stolen_base(int num, int slot, int func,
+ size_t stolen_size)
+{
+ u32 bsm;
+
+ /* Almost universally we can find the Graphics Base of Stolen Memory
+ * at register BSM (0x5c) in the igfx configuration space. On a few
+ * (desktop) machines this is also mirrored in the bridge device at
+ * different locations, or in the MCHBAR.
+ */
+ bsm = read_pci_config(num, slot, func, INTEL_BSM);
+
+ return (phys_addr_t)bsm & INTEL_BSM_MASK;
}
static size_t __init i830_stolen_size(int num, int slot, int func)
{
- size_t stolen_size;
u16 gmch_ctrl;
+ u16 gms;
gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL);
-
- switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
- case I830_GMCH_GMS_STOLEN_512:
- stolen_size = KB(512);
- break;
- case I830_GMCH_GMS_STOLEN_1024:
- stolen_size = MB(1);
- break;
- case I830_GMCH_GMS_STOLEN_8192:
- stolen_size = MB(8);
- break;
- case I830_GMCH_GMS_LOCAL:
- /* local memory isn't part of the normal address space */
- stolen_size = 0;
- break;
+ gms = gmch_ctrl & I830_GMCH_GMS_MASK;
+
+ switch (gms) {
+ case I830_GMCH_GMS_STOLEN_512: return KB(512);
+ case I830_GMCH_GMS_STOLEN_1024: return MB(1);
+ case I830_GMCH_GMS_STOLEN_8192: return MB(8);
+ /* local memory isn't part of the normal address space */
+ case I830_GMCH_GMS_LOCAL: return 0;
default:
- return 0;
+ WARN(1, "Unknown GMCH_CTRL value: %x!\n", gmch_ctrl);
}
- return stolen_size;
+ return 0;
}
static size_t __init gen3_stolen_size(int num, int slot, int func)
{
- size_t stolen_size;
u16 gmch_ctrl;
+ u16 gms;
gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL);
-
- switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
- case I855_GMCH_GMS_STOLEN_1M:
- stolen_size = MB(1);
- break;
- case I855_GMCH_GMS_STOLEN_4M:
- stolen_size = MB(4);
- break;
- case I855_GMCH_GMS_STOLEN_8M:
- stolen_size = MB(8);
- break;
- case I855_GMCH_GMS_STOLEN_16M:
- stolen_size = MB(16);
- break;
- case I855_GMCH_GMS_STOLEN_32M:
- stolen_size = MB(32);
- break;
- case I915_GMCH_GMS_STOLEN_48M:
- stolen_size = MB(48);
- break;
- case I915_GMCH_GMS_STOLEN_64M:
- stolen_size = MB(64);
- break;
- case G33_GMCH_GMS_STOLEN_128M:
- stolen_size = MB(128);
- break;
- case G33_GMCH_GMS_STOLEN_256M:
- stolen_size = MB(256);
- break;
- case INTEL_GMCH_GMS_STOLEN_96M:
- stolen_size = MB(96);
- break;
- case INTEL_GMCH_GMS_STOLEN_160M:
- stolen_size = MB(160);
- break;
- case INTEL_GMCH_GMS_STOLEN_224M:
- stolen_size = MB(224);
- break;
- case INTEL_GMCH_GMS_STOLEN_352M:
- stolen_size = MB(352);
- break;
+ gms = gmch_ctrl & I855_GMCH_GMS_MASK;
+
+ switch (gms) {
+ case I855_GMCH_GMS_STOLEN_1M: return MB(1);
+ case I855_GMCH_GMS_STOLEN_4M: return MB(4);
+ case I855_GMCH_GMS_STOLEN_8M: return MB(8);
+ case I855_GMCH_GMS_STOLEN_16M: return MB(16);
+ case I855_GMCH_GMS_STOLEN_32M: return MB(32);
+ case I915_GMCH_GMS_STOLEN_48M: return MB(48);
+ case I915_GMCH_GMS_STOLEN_64M: return MB(64);
+ case G33_GMCH_GMS_STOLEN_128M: return MB(128);
+ case G33_GMCH_GMS_STOLEN_256M: return MB(256);
+ case INTEL_GMCH_GMS_STOLEN_96M: return MB(96);
+ case INTEL_GMCH_GMS_STOLEN_160M:return MB(160);
+ case INTEL_GMCH_GMS_STOLEN_224M:return MB(224);
+ case INTEL_GMCH_GMS_STOLEN_352M:return MB(352);
default:
- stolen_size = 0;
- break;
+ WARN(1, "Unknown GMCH_CTRL value: %x!\n", gmch_ctrl);
}
- return stolen_size;
+ return 0;
}
static size_t __init gen6_stolen_size(int num, int slot, int func)
{
u16 gmch_ctrl;
+ u16 gms;
gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
- gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
- gmch_ctrl &= SNB_GMCH_GMS_MASK;
+ gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK;
- return gmch_ctrl << 25; /* 32 MB units */
+ return (size_t)gms * MB(32);
}
static size_t __init gen8_stolen_size(int num, int slot, int func)
{
u16 gmch_ctrl;
+ u16 gms;
gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
- gmch_ctrl >>= BDW_GMCH_GMS_SHIFT;
- gmch_ctrl &= BDW_GMCH_GMS_MASK;
- return gmch_ctrl << 25; /* 32 MB units */
+ gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK;
+
+ return (size_t)gms * MB(32);
}
static size_t __init chv_stolen_size(int num, int slot, int func)
{
u16 gmch_ctrl;
+ u16 gms;
gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
- gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
- gmch_ctrl &= SNB_GMCH_GMS_MASK;
+ gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK;
/*
* 0x0 to 0x10: 32MB increments starting at 0MB
* 0x11 to 0x16: 4MB increments starting at 8MB
* 0x17 to 0x1d: 4MB increments start at 36MB
*/
- if (gmch_ctrl < 0x11)
- return gmch_ctrl << 25;
- else if (gmch_ctrl < 0x17)
- return (gmch_ctrl - 0x11 + 2) << 22;
+ if (gms < 0x11)
+ return (size_t)gms * MB(32);
+ else if (gms < 0x17)
+ return (size_t)(gms - 0x11 + 2) * MB(4);
else
- return (gmch_ctrl - 0x17 + 9) << 22;
+ return (size_t)(gms - 0x17 + 9) * MB(4);
}
-struct intel_stolen_funcs {
- size_t (*size)(int num, int slot, int func);
- u32 (*base)(int num, int slot, int func, size_t size);
-};
-
static size_t __init gen9_stolen_size(int num, int slot, int func)
{
u16 gmch_ctrl;
+ u16 gms;
gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
- gmch_ctrl >>= BDW_GMCH_GMS_SHIFT;
- gmch_ctrl &= BDW_GMCH_GMS_MASK;
+ gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK;
- if (gmch_ctrl < 0xf0)
- return gmch_ctrl << 25; /* 32 MB units */
+ /* 0x0 to 0xef: 32MB increments starting at 0MB */
+ /* 0xf0 to 0xfe: 4MB increments starting at 4MB */
+ if (gms < 0xf0)
+ return (size_t)gms * MB(32);
else
- /* 4MB increments starting at 0xf0 for 4MB */
- return (gmch_ctrl - 0xf0 + 1) << 22;
+ return (size_t)(gms - 0xf0 + 1) * MB(4);
}
-typedef size_t (*stolen_size_fn)(int num, int slot, int func);
+struct intel_early_ops {
+ size_t (*stolen_size)(int num, int slot, int func);
+ phys_addr_t (*stolen_base)(int num, int slot, int func, size_t size);
+};
-static const struct intel_stolen_funcs i830_stolen_funcs __initconst = {
- .base = i830_stolen_base,
- .size = i830_stolen_size,
+static const struct intel_early_ops i830_early_ops __initconst = {
+ .stolen_base = i830_stolen_base,
+ .stolen_size = i830_stolen_size,
};
-static const struct intel_stolen_funcs i845_stolen_funcs __initconst = {
- .base = i845_stolen_base,
- .size = i830_stolen_size,
+static const struct intel_early_ops i845_early_ops __initconst = {
+ .stolen_base = i845_stolen_base,
+ .stolen_size = i830_stolen_size,
};
-static const struct intel_stolen_funcs i85x_stolen_funcs __initconst = {
- .base = i85x_stolen_base,
- .size = gen3_stolen_size,
+static const struct intel_early_ops i85x_early_ops __initconst = {
+ .stolen_base = i85x_stolen_base,
+ .stolen_size = gen3_stolen_size,
};
-static const struct intel_stolen_funcs i865_stolen_funcs __initconst = {
- .base = i865_stolen_base,
- .size = gen3_stolen_size,
+static const struct intel_early_ops i865_early_ops __initconst = {
+ .stolen_base = i865_stolen_base,
+ .stolen_size = gen3_stolen_size,
};
-static const struct intel_stolen_funcs gen3_stolen_funcs __initconst = {
- .base = intel_stolen_base,
- .size = gen3_stolen_size,
+static const struct intel_early_ops gen3_early_ops __initconst = {
+ .stolen_base = gen3_stolen_base,
+ .stolen_size = gen3_stolen_size,
};
-static const struct intel_stolen_funcs gen6_stolen_funcs __initconst = {
- .base = intel_stolen_base,
- .size = gen6_stolen_size,
+static const struct intel_early_ops gen6_early_ops __initconst = {
+ .stolen_base = gen3_stolen_base,
+ .stolen_size = gen6_stolen_size,
};
-static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = {
- .base = intel_stolen_base,
- .size = gen8_stolen_size,
+static const struct intel_early_ops gen8_early_ops __initconst = {
+ .stolen_base = gen3_stolen_base,
+ .stolen_size = gen8_stolen_size,
};
-static const struct intel_stolen_funcs gen9_stolen_funcs __initconst = {
- .base = intel_stolen_base,
- .size = gen9_stolen_size,
+static const struct intel_early_ops gen9_early_ops __initconst = {
+ .stolen_base = gen3_stolen_base,
+ .stolen_size = gen9_stolen_size,
};
-static const struct intel_stolen_funcs chv_stolen_funcs __initconst = {
- .base = intel_stolen_base,
- .size = chv_stolen_size,
+static const struct intel_early_ops chv_early_ops __initconst = {
+ .stolen_base = gen3_stolen_base,
+ .stolen_size = chv_stolen_size,
};
-static const struct pci_device_id intel_stolen_ids[] __initconst = {
- INTEL_I830_IDS(&i830_stolen_funcs),
- INTEL_I845G_IDS(&i845_stolen_funcs),
- INTEL_I85X_IDS(&i85x_stolen_funcs),
- INTEL_I865G_IDS(&i865_stolen_funcs),
- INTEL_I915G_IDS(&gen3_stolen_funcs),
- INTEL_I915GM_IDS(&gen3_stolen_funcs),
- INTEL_I945G_IDS(&gen3_stolen_funcs),
- INTEL_I945GM_IDS(&gen3_stolen_funcs),
- INTEL_VLV_M_IDS(&gen6_stolen_funcs),
- INTEL_VLV_D_IDS(&gen6_stolen_funcs),
- INTEL_PINEVIEW_IDS(&gen3_stolen_funcs),
- INTEL_I965G_IDS(&gen3_stolen_funcs),
- INTEL_G33_IDS(&gen3_stolen_funcs),
- INTEL_I965GM_IDS(&gen3_stolen_funcs),
- INTEL_GM45_IDS(&gen3_stolen_funcs),
- INTEL_G45_IDS(&gen3_stolen_funcs),
- INTEL_IRONLAKE_D_IDS(&gen3_stolen_funcs),
- INTEL_IRONLAKE_M_IDS(&gen3_stolen_funcs),
- INTEL_SNB_D_IDS(&gen6_stolen_funcs),
- INTEL_SNB_M_IDS(&gen6_stolen_funcs),
- INTEL_IVB_M_IDS(&gen6_stolen_funcs),
- INTEL_IVB_D_IDS(&gen6_stolen_funcs),
- INTEL_HSW_D_IDS(&gen6_stolen_funcs),
- INTEL_HSW_M_IDS(&gen6_stolen_funcs),
- INTEL_BDW_M_IDS(&gen8_stolen_funcs),
- INTEL_BDW_D_IDS(&gen8_stolen_funcs),
- INTEL_CHV_IDS(&chv_stolen_funcs),
- INTEL_SKL_IDS(&gen9_stolen_funcs),
- INTEL_BXT_IDS(&gen9_stolen_funcs),
- INTEL_KBL_IDS(&gen9_stolen_funcs),
+static const struct pci_device_id intel_early_ids[] __initconst = {
+ INTEL_I830_IDS(&i830_early_ops),
+ INTEL_I845G_IDS(&i845_early_ops),
+ INTEL_I85X_IDS(&i85x_early_ops),
+ INTEL_I865G_IDS(&i865_early_ops),
+ INTEL_I915G_IDS(&gen3_early_ops),
+ INTEL_I915GM_IDS(&gen3_early_ops),
+ INTEL_I945G_IDS(&gen3_early_ops),
+ INTEL_I945GM_IDS(&gen3_early_ops),
+ INTEL_VLV_M_IDS(&gen6_early_ops),
+ INTEL_VLV_D_IDS(&gen6_early_ops),
+ INTEL_PINEVIEW_IDS(&gen3_early_ops),
+ INTEL_I965G_IDS(&gen3_early_ops),
+ INTEL_G33_IDS(&gen3_early_ops),
+ INTEL_I965GM_IDS(&gen3_early_ops),
+ INTEL_GM45_IDS(&gen3_early_ops),
+ INTEL_G45_IDS(&gen3_early_ops),
+ INTEL_IRONLAKE_D_IDS(&gen3_early_ops),
+ INTEL_IRONLAKE_M_IDS(&gen3_early_ops),
+ INTEL_SNB_D_IDS(&gen6_early_ops),
+ INTEL_SNB_M_IDS(&gen6_early_ops),
+ INTEL_IVB_M_IDS(&gen6_early_ops),
+ INTEL_IVB_D_IDS(&gen6_early_ops),
+ INTEL_HSW_D_IDS(&gen6_early_ops),
+ INTEL_HSW_M_IDS(&gen6_early_ops),
+ INTEL_BDW_M_IDS(&gen8_early_ops),
+ INTEL_BDW_D_IDS(&gen8_early_ops),
+ INTEL_CHV_IDS(&chv_early_ops),
+ INTEL_SKL_IDS(&gen9_early_ops),
+ INTEL_BXT_IDS(&gen9_early_ops),
+ INTEL_KBL_IDS(&gen9_early_ops),
};
-static void __init intel_graphics_stolen(int num, int slot, int func)
+static void __init
+intel_graphics_stolen(int num, int slot, int func,
+ const struct intel_early_ops *early_ops)
{
+ phys_addr_t base, end;
size_t size;
+
+ size = early_ops->stolen_size(num, slot, func);
+ base = early_ops->stolen_base(num, slot, func, size);
+
+ if (!size || !base)
+ return;
+
+ end = base + size - 1;
+ printk(KERN_INFO "Reserving Intel graphics memory at %pa-%pa\n",
+ &base, &end);
+
+ /* Mark this space as reserved */
+ e820_add_region(base, size, E820_RESERVED);
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
+static void __init intel_graphics_quirks(int num, int slot, int func)
+{
+ const struct intel_early_ops *early_ops;
+ u16 device;
int i;
- u32 start;
- u16 device, subvendor, subdevice;
device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
- subvendor = read_pci_config_16(num, slot, func,
- PCI_SUBSYSTEM_VENDOR_ID);
- subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID);
-
- for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) {
- if (intel_stolen_ids[i].device == device) {
- const struct intel_stolen_funcs *stolen_funcs =
- (const struct intel_stolen_funcs *)intel_stolen_ids[i].driver_data;
- size = stolen_funcs->size(num, slot, func);
- start = stolen_funcs->base(num, slot, func, size);
- if (size && start) {
- printk(KERN_INFO "Reserving Intel graphics stolen memory at 0x%x-0x%x\n",
- start, start + (u32)size - 1);
- /* Mark this space as reserved */
- e820_add_region(start, size, E820_RESERVED);
- sanitize_e820_map(e820.map,
- ARRAY_SIZE(e820.map),
- &e820.nr_map);
- }
- return;
- }
+
+ for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) {
+ kernel_ulong_t driver_data = intel_early_ids[i].driver_data;
+
+ if (intel_early_ids[i].device != device)
+ continue;
+
+ early_ops = (typeof(early_ops))driver_data;
+
+ intel_graphics_stolen(num, slot, func, early_ops);
+
+ return;
}
}
@@ -690,7 +674,7 @@ static struct chipset early_qrk[] __initdata = {
{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
- QFLAG_APPLY_ONCE, intel_graphics_stolen },
+ QFLAG_APPLY_ONCE, intel_graphics_quirks },
/*
* HPET on the current version of the Baytrail platform has accuracy
* problems: it will halt in deep idle state - so we disable it.
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 9e231d88bb33..a184c210efba 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -159,8 +159,8 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
- ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
- config_enabled(CONFIG_IA32_EMULATION));
+ ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
+ IS_ENABLED(CONFIG_IA32_EMULATION));
if (!access_ok(VERIFY_WRITE, buf, size))
return -EACCES;
@@ -268,8 +268,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
u64 xfeatures = 0;
int fx_only = 0;
- ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
- config_enabled(CONFIG_IA32_EMULATION));
+ ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
+ IS_ENABLED(CONFIG_IA32_EMULATION));
if (!buf) {
fpu__clear(fpu);
@@ -416,8 +416,8 @@ void fpu__init_prepare_fx_sw_frame(void)
fx_sw_reserved.xfeatures = xfeatures_mask;
fx_sw_reserved.xstate_size = fpu_user_xstate_size;
- if (config_enabled(CONFIG_IA32_EMULATION) ||
- config_enabled(CONFIG_X86_32)) {
+ if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
+ IS_ENABLED(CONFIG_X86_32)) {
int fsave_header_size = sizeof(struct fregs_state);
fx_sw_reserved_ia32 = fx_sw_reserved;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3d747070fe67..ed16e58658a4 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1019,7 +1019,6 @@ void hpet_disable(void)
*/
#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
-#include <asm/rtc.h>
#define DEFAULT_RTC_INT_FREQ 64
#define DEFAULT_RTC_SHIFT 6
@@ -1243,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- get_rtc_time(&curr_time);
+ mc146818_set_time(&curr_time);
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 2bcfb5f2bc44..8771766d46b6 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -36,13 +36,14 @@
#include <linux/percpu.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
+#include <asm/user.h>
/* Per cpu debug control register value */
DEFINE_PER_CPU(unsigned long, cpu_dr7);
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index d40ee8a38fed..1f9b878ef5ef 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,4 +1,5 @@
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/spinlock_types.h>
#include <asm/checksum.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index efb82f07b29c..6ebe00cb4a3b 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -3,7 +3,7 @@
*
*/
#include <linux/clockchips.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/timex.h>
#include <linux/i8253.h>
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index a979b5bd2fc0..50c89e8a95f2 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -6,7 +6,7 @@
* outb_p/inb_p API uses.
*/
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/dmi.h>
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index c627bf8d98ad..1f38d9a4d9de 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -8,7 +8,6 @@
* io_apic.c.)
*/
-#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 206d0b90a3ab..4a7903714065 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -11,7 +11,6 @@
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
-#include <linux/module.h>
#include <linux/delay.h>
#include <linux/ftrace.h>
#include <linux/uaccess.h>
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index dc1404bf8e4b..bdb83e431d89 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -8,7 +8,7 @@
*/
#include <linux/debugfs.h>
#include <linux/uaccess.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/stat.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1ef5e48b3a36..1726c4c12336 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -21,7 +21,7 @@
*/
#include <linux/context_tracking.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kvm_para.h>
#include <linux/cpu.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 97340f2c437c..068c4a929de6 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,7 +16,6 @@
#include <linux/mc146818rtc.h>
#include <linux/bitops.h>
#include <linux/acpi.h>
-#include <linux/module.h>
#include <linux/smp.h>
#include <linux/pci.h>
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 04b132a767f1..bfe4d6c96fbd 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/export.h>
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 33ee3e0efd65..1939a0269377 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -3,7 +3,7 @@
* compiled in a FTRACE-compatible way.
*/
#include <linux/spinlock.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/jump_label.h>
#include <asm/paravirt.h>
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7b3b3f24c3ea..ad5bc9578a73 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -19,7 +19,8 @@
*/
#include <linux/errno.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/efi.h>
#include <linux/bcd.h>
#include <linux/highmem.h>
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 833b1d329c47..5d400ba1349d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -340,7 +340,7 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems,enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct iommu_table *tbl = find_iommu_table(dev);
struct scatterlist *s;
@@ -364,7 +364,7 @@ static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct iommu_table *tbl = find_iommu_table(dev);
struct scatterlist *s;
@@ -396,7 +396,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
return nelems;
error:
- calgary_unmap_sg(dev, sg, nelems, dir, NULL);
+ calgary_unmap_sg(dev, sg, nelems, dir, 0);
for_each_sg(sg, s, nelems, i) {
sg->dma_address = DMA_ERROR_CODE;
sg->dma_length = 0;
@@ -407,7 +407,7 @@ error:
static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vaddr = page_address(page) + offset;
unsigned long uaddr;
@@ -422,7 +422,7 @@ static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
@@ -432,7 +432,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
}
static void* calgary_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
+ dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
{
void *ret = NULL;
dma_addr_t mapping;
@@ -466,7 +466,7 @@ error:
static void calgary_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned int npages;
struct iommu_table *tbl = find_iommu_table(dev);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 6ba014c61d62..d30c37750765 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -77,7 +77,7 @@ void __init pci_iommu_alloc(void)
}
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned long dma_mask;
struct page *page;
@@ -120,7 +120,7 @@ again:
}
void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, struct dma_attrs *attrs)
+ dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page *page = virt_to_page(vaddr);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index da15918d1c81..00e71ce396a8 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -28,7 +28,7 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
dma_addr_t bus = page_to_phys(page) + offset;
WARN_ON(size == 0);
@@ -55,7 +55,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
*/
static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct scatterlist *s;
int i;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 7c577a178859..b47edb8f5256 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -2,7 +2,7 @@
#include <linux/pci.h>
#include <linux/cache.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/swiotlb.h>
#include <linux/bootmem.h>
#include <linux/dma-mapping.h>
@@ -16,7 +16,7 @@ int swiotlb __read_mostly;
void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vaddr;
@@ -37,7 +37,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
void x86_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
swiotlb_free_coherent(dev, size, vaddr, dma_addr);
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 92f70147a9a6..0c5315d322c8 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -3,7 +3,7 @@
* Copyright (c) 2015, Intel Corporation.
*/
#include <linux/platform_device.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/ioport.h>
static int found(u64 start, u64 end, void *data)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 59f68f1d734b..62c0b0ea2ce4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,7 +7,8 @@
#include <linux/prctl.h>
#include <linux/slab.h>
#include <linux/sched.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/pm.h>
#include <linux/tick.h>
#include <linux/random.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9f950917528b..d86be29c38c7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -25,7 +25,7 @@
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/mc146818rtc.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/personality.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6e789ca1f841..63236d8f84bf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -26,7 +26,7 @@
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/notifier.h>
#include <linux/kprobes.h>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 600edd225e81..f79576a541ff 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
case offsetof(struct user32, regs.orig_eax):
/*
- * A 32-bit debugger setting orig_eax means to restore
- * the state of the task restarting a 32-bit syscall.
- * Make sure we interpret the -ERESTART* codes correctly
- * in case the task is not actually still sitting at the
- * exit from a 32-bit syscall with TS_COMPAT still set.
+ * Warning: bizarre corner case fixup here. A 32-bit
+ * debugger setting orig_eax to -1 wants to disable
+ * syscall restart. Make sure that the syscall
+ * restart code sign-extends orig_ax. Also make sure
+ * we interpret the -ERESTART* codes correctly if
+ * loaded into regs->ax in case the task is not
+ * actually still sitting at the exit from a 32-bit
+ * syscall with TS_COMPAT still set.
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
- task_thread_info(child)->status |= TS_COMPAT;
+ task_thread_info(child)->status |= TS_I386_REGS_POKED;
break;
case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 06c58ce46762..3599404e3089 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
+ version = pvclock_read_begin(src);
flags = src->flags;
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ } while (pvclock_read_retry(src, version));
return flags & valid_flags;
}
@@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = __pvclock_read_cycles(src, &ret, &flags);
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ version = pvclock_read_begin(src);
+ ret = __pvclock_read_cycles(src);
+ flags = src->flags;
+ } while (pvclock_read_retry(src, version));
if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
src->flags &= ~PVCLOCK_GUEST_STOPPED;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 15ed70f8278b..63bf27d972b7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,6 +1,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/pm.h>
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index eceaa082ec3f..79c6311cd912 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -13,7 +13,6 @@
#include <asm/x86_init.h>
#include <asm/time.h>
#include <asm/intel-mid.h>
-#include <asm/rtc.h>
#include <asm/setup.h>
#ifdef CONFIG_X86_32
@@ -47,7 +46,7 @@ int mach_set_rtc_mmss(const struct timespec *now)
rtc_time_to_tm(nowtime, &tm);
if (!rtc_valid_tm(&tm)) {
- retval = set_rtc_time(&tm);
+ retval = mc146818_set_time(&tm);
if (retval)
printk(KERN_ERR "%s: RTC write failed with error %d\n",
__func__, retval);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6cb2b02fcc87..991b77986d57 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -36,7 +36,7 @@
#include <linux/console.h>
#include <linux/root_dev.h>
#include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/edd.h>
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 22cc2f9f8aec..04cb3212db2d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -146,7 +146,7 @@ static int restore_sigcontext(struct pt_regs *regs,
buf = (void __user *)buf_val;
} get_user_catch(err);
- err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
+ err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
force_iret();
@@ -245,14 +245,14 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
struct fpu *fpu = &current->thread.fpu;
/* redzone */
- if (config_enabled(CONFIG_X86_64))
+ if (IS_ENABLED(CONFIG_X86_64))
sp -= 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
- } else if (config_enabled(CONFIG_X86_32) &&
+ } else if (IS_ENABLED(CONFIG_X86_32) &&
!onsigstack &&
(regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
@@ -262,7 +262,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
}
if (fpu->fpstate_active) {
- sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+ sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
&buf_fx, &math_size);
*fpstate = (void __user *)sp;
}
@@ -662,18 +662,18 @@ badframe:
static inline int is_ia32_compat_frame(void)
{
- return config_enabled(CONFIG_IA32_EMULATION) &&
+ return IS_ENABLED(CONFIG_IA32_EMULATION) &&
test_thread_flag(TIF_IA32);
}
static inline int is_ia32_frame(void)
{
- return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+ return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame();
}
static inline int is_x32_frame(void)
{
- return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+ return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
}
static int
@@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
-#ifdef CONFIG_X86_64
- if (in_ia32_syscall())
+ /*
+ * This function is fundamentally broken as currently
+ * implemented.
+ *
+ * The idea is that we want to trigger a call to the
+ * restart_block() syscall and that we want in_ia32_syscall(),
+ * in_x32_syscall(), etc. to match whatever they were in the
+ * syscall being restarted. We assume that the syscall
+ * instruction at (regs->ip - 2) matches whatever syscall
+ * instruction we used to enter in the first place.
+ *
+ * The problem is that we can get here when ptrace pokes
+ * syscall-like values into regs even if we're not in a syscall
+ * at all.
+ *
+ * For now, we maintain historical behavior and guess based on
+ * stored state. We could do better by saving the actual
+ * syscall arch in restart_block or (with caveats on x32) by
+ * checking if regs->ip points to 'int $0x80'. The current
+ * behavior is incorrect if a tracer has a different bitness
+ * than the tracee.
+ */
+#ifdef CONFIG_IA32_EMULATION
+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c93609c97406..2a6e84a30a54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -43,7 +43,7 @@
#include <linux/init.h>
#include <linux/smp.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/percpu.h>
#include <linux/bootmem.h>
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 9ee98eefc44d..4738f5e0f2ab 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -5,7 +5,7 @@
*/
#include <linux/sched.h>
#include <linux/stacktrace.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/uaccess.h>
#include <asm/stacktrace.h>
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index cb4a01b41e27..222e84e2432e 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -9,7 +9,6 @@
* as published by the Free Software Foundation; version 2
* of the License.
*/
-#include <linux/module.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
#include <asm/asm.h>
@@ -74,7 +73,3 @@ int rodata_test(void)
return 0;
}
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
-MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 00f03d82e69a..b70ca12dd389 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -21,7 +21,7 @@
#include <linux/kdebug.h>
#include <linux/kgdb.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/uprobes.h>
#include <linux/string.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8fb4b6abac0e..78b9cb5a26af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -3,7 +3,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/timer.h>
#include <linux/acpi_pmtmr.h>
#include <linux/cpufreq.h>
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index f1aebfb49c36..95e49f6e4fc3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -1,7 +1,8 @@
/* Exports for assembly files.
All C exports should go in the respective C files. */
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/spinlock_types.h>
#include <linux/smp.h>
#include <net/checksum.h>
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 58b459296e13..76c5e52436c4 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -5,7 +5,7 @@
*/
#include <linux/init.h>
#include <linux/ioport.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/pci.h>
#include <asm/bios_ebda.h>
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 639a6e34500c..ab8e32f7b9a8 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -32,7 +32,6 @@ config KVM
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
- select KVM_APIC_ARCHITECTURE
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 643565364497..3235e0fe7792 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -13,7 +13,7 @@
*/
#include <linux/kvm_host.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a2f24af3c999..4e95d3eb2955 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,7 +22,6 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
-#include <linux/module.h>
#include <asm/kvm_emulate.h>
#include <linux/stringify.h>
#include <asm/debugreg.h>
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a4bf5b45d65a..5fb6c620180e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -645,7 +645,6 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
.write = speaker_ioport_write,
};
-/* Caller must hold slots_lock */
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
{
struct kvm_pit *pit;
@@ -690,6 +689,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
kvm_pit_set_reinject(pit, true);
+ mutex_lock(&kvm->slots_lock);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
KVM_PIT_MEM_LENGTH, &pit->dev);
@@ -704,12 +704,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
if (ret < 0)
goto fail_register_speaker;
}
+ mutex_unlock(&kvm->slots_lock);
return pit;
fail_register_speaker:
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
fail_register_pit:
+ mutex_unlock(&kvm->slots_lock);
kvm_pit_set_reinject(pit, false);
kthread_stop(pit->worker_task);
fail_kthread:
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index 3069281904d3..b181426f67b4 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -25,12 +25,10 @@
#include <linux/list.h>
#include <linux/kvm_host.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/pci.h>
#include <linux/stat.h>
-#include <linux/dmar.h>
#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
#include "assigned-dev.h"
static bool allow_unsafe_assigned_interrupts;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 95fcc7b13866..60d91c9d160c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -20,7 +20,7 @@
*
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kvm_host.h>
#include "irq.h"
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 61ebdc13a29a..035731eb3897 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
+
#endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index dfb4c6476877..25810b144b58 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
return r;
}
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
{
- trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+ trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
+ (u64)e->msi.address_hi << 32 : 0),
+ e->msi.data);
irq->dest_id = (e->msi.address_lo &
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+ if (kvm->arch.x2apic_format)
+ irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
irq->vector = (e->msi.data &
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
@@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
}
EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
+static inline bool kvm_msi_route_invalid(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e)
+{
+ return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
+}
+
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level, bool line_status)
{
struct kvm_lapic_irq irq;
+ if (kvm_msi_route_invalid(kvm, e))
+ return -EINVAL;
+
if (!level)
return -1;
- kvm_set_msi_irq(e, &irq);
+ kvm_set_msi_irq(kvm, e, &irq);
return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
}
@@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
return -EWOULDBLOCK;
- kvm_set_msi_irq(e, &irq);
+ if (kvm_msi_route_invalid(kvm, e))
+ return -EINVAL;
+
+ kvm_set_msi_irq(kvm, e, &irq);
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
return r;
@@ -248,7 +264,8 @@ static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
}
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
@@ -285,6 +302,9 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;
+
+ if (kvm_msi_route_invalid(kvm, e))
+ goto out;
break;
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
@@ -388,21 +408,16 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
kvm->arch.nr_reserved_ioapic_pins);
for (i = 0; i < nr_ioapic_pins; ++i) {
hlist_for_each_entry(entry, &table->map[i], link) {
- u32 dest_id, dest_mode;
- bool level;
+ struct kvm_lapic_irq irq;
if (entry->type != KVM_IRQ_ROUTING_MSI)
continue;
- dest_id = (entry->msi.address_lo >> 12) & 0xff;
- dest_mode = (entry->msi.address_lo >> 2) & 0x1;
- level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL;
- if (level && kvm_apic_match_dest(vcpu, NULL, 0,
- dest_id, dest_mode)) {
- u32 vector = entry->msi.data & 0xff;
-
- __set_bit(vector,
- ioapic_handled_vectors);
- }
+
+ kvm_set_msi_irq(vcpu->kvm, entry, &irq);
+
+ if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
+ irq.dest_id, irq.dest_mode))
+ __set_bit(irq.vector, ioapic_handled_vectors);
}
}
srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a397200281c1..b62c85229711 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -25,7 +25,7 @@
#include <linux/smp.h>
#include <linux/hrtimer.h>
#include <linux/io.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/math64.h>
#include <linux/slab.h>
#include <asm/processor.h>
@@ -115,26 +115,43 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
-/* The logical map is definitely wrong if we have multiple
- * modes at the same time. (Physical map is always right.)
- */
-static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
-{
- return !(map->mode & (map->mode - 1));
+static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
+ u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
+ switch (map->mode) {
+ case KVM_APIC_MODE_X2APIC: {
+ u32 offset = (dest_id >> 16) * 16;
+ u32 max_apic_id = map->max_apic_id;
+
+ if (offset <= max_apic_id) {
+ u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+
+ *cluster = &map->phys_map[offset];
+ *mask = dest_id & (0xffff >> (16 - cluster_size));
+ } else {
+ *mask = 0;
+ }
+
+ return true;
+ }
+ case KVM_APIC_MODE_XAPIC_FLAT:
+ *cluster = map->xapic_flat_map;
+ *mask = dest_id & 0xff;
+ return true;
+ case KVM_APIC_MODE_XAPIC_CLUSTER:
+ *cluster = map->xapic_cluster_map[dest_id >> 4];
+ *mask = dest_id & 0xf;
+ return true;
+ default:
+ /* Not optimized. */
+ return false;
+ }
}
-static inline void
-apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+static void kvm_apic_map_free(struct rcu_head *rcu)
{
- unsigned lid_bits;
-
- BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4);
- BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8);
- BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16);
- lid_bits = map->mode;
+ struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
- *cid = dest_id >> lid_bits;
- *lid = dest_id & ((1 << lid_bits) - 1);
+ kvfree(map);
}
static void recalculate_apic_map(struct kvm *kvm)
@@ -142,17 +159,26 @@ static void recalculate_apic_map(struct kvm *kvm)
struct kvm_apic_map *new, *old = NULL;
struct kvm_vcpu *vcpu;
int i;
-
- new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+ u32 max_id = 255;
mutex_lock(&kvm->arch.apic_map_lock);
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ if (kvm_apic_present(vcpu))
+ max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
+
+ new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
+ sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
+
if (!new)
goto out;
+ new->max_apic_id = max_id;
+
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvm_lapic *apic = vcpu->arch.apic;
- u16 cid, lid;
+ struct kvm_lapic **cluster;
+ u16 mask;
u32 ldr, aid;
if (!kvm_apic_present(vcpu))
@@ -161,7 +187,7 @@ static void recalculate_apic_map(struct kvm *kvm)
aid = kvm_apic_id(apic);
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
- if (aid < ARRAY_SIZE(new->phys_map))
+ if (aid <= new->max_apic_id)
new->phys_map[aid] = apic;
if (apic_x2apic_mode(apic)) {
@@ -174,13 +200,11 @@ static void recalculate_apic_map(struct kvm *kvm)
new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
}
- if (!kvm_apic_logical_map_valid(new))
+ if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
continue;
- apic_logical_id(new, ldr, &cid, &lid);
-
- if (lid && cid < ARRAY_SIZE(new->logical_map))
- new->logical_map[cid][ffs(lid) - 1] = apic;
+ if (mask)
+ cluster[ffs(mask) - 1] = apic;
}
out:
old = rcu_dereference_protected(kvm->arch.apic_map,
@@ -189,7 +213,7 @@ out:
mutex_unlock(&kvm->arch.apic_map_lock);
if (old)
- kfree_rcu(old, rcu);
+ call_rcu(&old->rcu, kvm_apic_map_free);
kvm_make_scan_ioapic_request(kvm);
}
@@ -210,7 +234,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
}
}
-static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
{
kvm_lapic_set_reg(apic, APIC_ID, id << 24);
recalculate_apic_map(apic->vcpu->kvm);
@@ -222,11 +246,11 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
recalculate_apic_map(apic->vcpu->kvm);
}
-static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
{
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
- kvm_lapic_set_reg(apic, APIC_ID, id << 24);
+ kvm_lapic_set_reg(apic, APIC_ID, id);
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
recalculate_apic_map(apic->vcpu->kvm);
}
@@ -599,17 +623,30 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
}
}
-/* KVM APIC implementation has two quirks
- * - dest always begins at 0 while xAPIC MDA has offset 24,
- * - IOxAPIC messages have to be delivered (directly) to x2APIC.
+/* The KVM local APIC implementation has two quirks:
+ *
+ * - the xAPIC MDA stores the destination at bits 24-31, while this
+ * is not true of struct kvm_lapic_irq's dest_id field. This is
+ * just a quirk in the API and is not problematic.
+ *
+ * - in-kernel IOAPIC messages have to be delivered directly to
+ * x2APIC, because the kernel does not support interrupt remapping.
+ * In order to support broadcast without interrupt remapping, x2APIC
+ * rewrites the destination of non-IPI messages from APIC_BROADCAST
+ * to X2APIC_BROADCAST.
+ *
+ * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is
+ * important when userspace wants to use x2APIC-format MSIs, because
+ * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
*/
-static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
- struct kvm_lapic *target)
+static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
+ struct kvm_lapic *source, struct kvm_lapic *target)
{
bool ipi = source != NULL;
bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
- if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+ if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
+ !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
return X2APIC_BROADCAST;
return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
@@ -619,7 +656,7 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode)
{
struct kvm_lapic *target = vcpu->arch.apic;
- u32 mda = kvm_apic_mda(dest, source, target);
+ u32 mda = kvm_apic_mda(vcpu, dest, source, target);
apic_debug("target %p, source %p, dest 0x%x, "
"dest_mode 0x%x, short_hand 0x%x\n",
@@ -671,102 +708,126 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
}
}
-bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
+static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
+ struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
{
- struct kvm_apic_map *map;
- unsigned long bitmap = 1;
- struct kvm_lapic **dst;
- int i;
- bool ret, x2apic_ipi;
+ if (kvm->arch.x2apic_broadcast_quirk_disabled) {
+ if ((irq->dest_id == APIC_BROADCAST &&
+ map->mode != KVM_APIC_MODE_X2APIC))
+ return true;
+ if (irq->dest_id == X2APIC_BROADCAST)
+ return true;
+ } else {
+ bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
+ if (irq->dest_id == (x2apic_ipi ?
+ X2APIC_BROADCAST : APIC_BROADCAST))
+ return true;
+ }
- *r = -1;
+ return false;
+}
- if (irq->shorthand == APIC_DEST_SELF) {
- *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
- return true;
- }
+/* Return true if the interrupt can be handled by using *bitmap as index mask
+ * for valid destinations in *dst array.
+ * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
+ * Note: we may have zero kvm_lapic destinations when we return true, which
+ * means that the interrupt should be dropped. In this case, *bitmap would be
+ * zero and *dst undefined.
+ */
+static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
+ struct kvm_lapic **src, struct kvm_lapic_irq *irq,
+ struct kvm_apic_map *map, struct kvm_lapic ***dst,
+ unsigned long *bitmap)
+{
+ int i, lowest;
- if (irq->shorthand)
+ if (irq->shorthand == APIC_DEST_SELF && src) {
+ *dst = src;
+ *bitmap = 1;
+ return true;
+ } else if (irq->shorthand)
return false;
- x2apic_ipi = src && apic_x2apic_mode(src);
- if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+ if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
return false;
- ret = true;
- rcu_read_lock();
- map = rcu_dereference(kvm->arch.apic_map);
-
- if (!map) {
- ret = false;
- goto out;
+ if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+ if (irq->dest_id > map->max_apic_id) {
+ *bitmap = 0;
+ } else {
+ *dst = &map->phys_map[irq->dest_id];
+ *bitmap = 1;
+ }
+ return true;
}
- if (irq->dest_mode == APIC_DEST_PHYSICAL) {
- if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
- goto out;
+ *bitmap = 0;
+ if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
+ (u16 *)bitmap))
+ return false;
- dst = &map->phys_map[irq->dest_id];
- } else {
- u16 cid;
+ if (!kvm_lowest_prio_delivery(irq))
+ return true;
- if (!kvm_apic_logical_map_valid(map)) {
- ret = false;
- goto out;
+ if (!kvm_vector_hashing_enabled()) {
+ lowest = -1;
+ for_each_set_bit(i, bitmap, 16) {
+ if (!(*dst)[i])
+ continue;
+ if (lowest < 0)
+ lowest = i;
+ else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
+ (*dst)[lowest]->vcpu) < 0)
+ lowest = i;
}
+ } else {
+ if (!*bitmap)
+ return true;
- apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+ lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
+ bitmap, 16);
- if (cid >= ARRAY_SIZE(map->logical_map))
- goto out;
+ if (!(*dst)[lowest]) {
+ kvm_apic_disabled_lapic_found(kvm);
+ *bitmap = 0;
+ return true;
+ }
+ }
- dst = map->logical_map[cid];
+ *bitmap = (lowest >= 0) ? 1 << lowest : 0;
- if (!kvm_lowest_prio_delivery(irq))
- goto set_irq;
+ return true;
+}
- if (!kvm_vector_hashing_enabled()) {
- int l = -1;
- for_each_set_bit(i, &bitmap, 16) {
- if (!dst[i])
- continue;
- if (l < 0)
- l = i;
- else if (kvm_apic_compare_prio(dst[i]->vcpu,
- dst[l]->vcpu) < 0)
- l = i;
- }
- bitmap = (l >= 0) ? 1 << l : 0;
- } else {
- int idx;
- unsigned int dest_vcpus;
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
+{
+ struct kvm_apic_map *map;
+ unsigned long bitmap;
+ struct kvm_lapic **dst = NULL;
+ int i;
+ bool ret;
- dest_vcpus = hweight16(bitmap);
- if (dest_vcpus == 0)
- goto out;
+ *r = -1;
- idx = kvm_vector_to_index(irq->vector,
- dest_vcpus, &bitmap, 16);
+ if (irq->shorthand == APIC_DEST_SELF) {
+ *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
+ return true;
+ }
- if (!dst[idx]) {
- kvm_apic_disabled_lapic_found(kvm);
- goto out;
- }
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
- bitmap = (idx >= 0) ? 1 << idx : 0;
+ ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
+ if (ret)
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i])
+ continue;
+ if (*r < 0)
+ *r = 0;
+ *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
- }
-set_irq:
- for_each_set_bit(i, &bitmap, 16) {
- if (!dst[i])
- continue;
- if (*r < 0)
- *r = 0;
- *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
- }
-out:
rcu_read_unlock();
return ret;
}
@@ -789,8 +850,9 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu)
{
struct kvm_apic_map *map;
+ unsigned long bitmap;
+ struct kvm_lapic **dst = NULL;
bool ret = false;
- struct kvm_lapic *dst = NULL;
if (irq->shorthand)
return false;
@@ -798,69 +860,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
- if (!map)
- goto out;
-
- if (irq->dest_mode == APIC_DEST_PHYSICAL) {
- if (irq->dest_id == 0xFF)
- goto out;
-
- if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
- goto out;
-
- dst = map->phys_map[irq->dest_id];
- if (dst && kvm_apic_present(dst->vcpu))
- *dest_vcpu = dst->vcpu;
- else
- goto out;
- } else {
- u16 cid;
- unsigned long bitmap = 1;
- int i, r = 0;
-
- if (!kvm_apic_logical_map_valid(map))
- goto out;
-
- apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
-
- if (cid >= ARRAY_SIZE(map->logical_map))
- goto out;
-
- if (kvm_vector_hashing_enabled() &&
- kvm_lowest_prio_delivery(irq)) {
- int idx;
- unsigned int dest_vcpus;
+ if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
+ hweight16(bitmap) == 1) {
+ unsigned long i = find_first_bit(&bitmap, 16);
- dest_vcpus = hweight16(bitmap);
- if (dest_vcpus == 0)
- goto out;
-
- idx = kvm_vector_to_index(irq->vector, dest_vcpus,
- &bitmap, 16);
-
- dst = map->logical_map[cid][idx];
- if (!dst) {
- kvm_apic_disabled_lapic_found(kvm);
- goto out;
- }
-
- *dest_vcpu = dst->vcpu;
- } else {
- for_each_set_bit(i, &bitmap, 16) {
- dst = map->logical_map[cid][i];
- if (++r == 2)
- goto out;
- }
-
- if (dst && kvm_apic_present(dst->vcpu))
- *dest_vcpu = dst->vcpu;
- else
- goto out;
+ if (dst[i]) {
+ *dest_vcpu = dst[i]->vcpu;
+ ret = true;
}
}
- ret = true;
-out:
rcu_read_unlock();
return ret;
}
@@ -1127,12 +1136,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
return 0;
switch (offset) {
- case APIC_ID:
- if (apic_x2apic_mode(apic))
- val = kvm_apic_id(apic);
- else
- val = kvm_apic_id(apic) << 24;
- break;
case APIC_ARBPRI:
apic_debug("Access APIC ARBPRI register which is for P6\n");
break;
@@ -1314,6 +1317,111 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
}
+static void start_sw_tscdeadline(struct kvm_lapic *apic)
+{
+ u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+ u64 ns = 0;
+ ktime_t expire;
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
+ unsigned long flags;
+ ktime_t now;
+
+ if (unlikely(!tscdeadline || !this_tsc_khz))
+ return;
+
+ local_irq_save(flags);
+
+ now = apic->lapic_timer.timer.base->get_time();
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+ if (likely(tscdeadline > guest_tsc)) {
+ ns = (tscdeadline - guest_tsc) * 1000000ULL;
+ do_div(ns, this_tsc_khz);
+ expire = ktime_add_ns(now, ns);
+ expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
+ hrtimer_start(&apic->lapic_timer.timer,
+ expire, HRTIMER_MODE_ABS_PINNED);
+ } else
+ apic_timer_expired(apic);
+
+ local_irq_restore(flags);
+}
+
+bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
+{
+ if (!lapic_in_kernel(vcpu))
+ return false;
+
+ return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
+
+static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+{
+ kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+ apic->lapic_timer.hv_timer_in_use = false;
+}
+
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ WARN_ON(swait_active(&vcpu->wq));
+ cancel_hv_tscdeadline(apic);
+ apic_timer_expired(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
+static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+{
+ u64 tscdeadline = apic->lapic_timer.tscdeadline;
+
+ if (atomic_read(&apic->lapic_timer.pending) ||
+ kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_tscdeadline(apic);
+ } else {
+ apic->lapic_timer.hv_timer_in_use = true;
+ hrtimer_cancel(&apic->lapic_timer.timer);
+
+ /* In case the sw timer triggered in the window */
+ if (atomic_read(&apic->lapic_timer.pending))
+ cancel_hv_tscdeadline(apic);
+ }
+ trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
+ apic->lapic_timer.hv_timer_in_use);
+ return apic->lapic_timer.hv_timer_in_use;
+}
+
+void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ WARN_ON(apic->lapic_timer.hv_timer_in_use);
+
+ if (apic_lvtt_tscdeadline(apic))
+ start_hv_tscdeadline(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
+
+void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ /* Possibly the TSC deadline timer is not enabled yet */
+ if (!apic->lapic_timer.hv_timer_in_use)
+ return;
+
+ cancel_hv_tscdeadline(apic);
+
+ if (atomic_read(&apic->lapic_timer.pending))
+ return;
+
+ start_sw_tscdeadline(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
+
static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now;
@@ -1360,32 +1468,8 @@ static void start_apic_timer(struct kvm_lapic *apic)
ktime_to_ns(ktime_add_ns(now,
apic->lapic_timer.period)));
} else if (apic_lvtt_tscdeadline(apic)) {
- /* lapic timer in tsc deadline mode */
- u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
- u64 ns = 0;
- ktime_t expire;
- struct kvm_vcpu *vcpu = apic->vcpu;
- unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
- unsigned long flags;
-
- if (unlikely(!tscdeadline || !this_tsc_khz))
- return;
-
- local_irq_save(flags);
-
- now = apic->lapic_timer.timer.base->get_time();
- guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- if (likely(tscdeadline > guest_tsc)) {
- ns = (tscdeadline - guest_tsc) * 1000000ULL;
- do_div(ns, this_tsc_khz);
- expire = ktime_add_ns(now, ns);
- expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
- hrtimer_start(&apic->lapic_timer.timer,
- expire, HRTIMER_MODE_ABS_PINNED);
- } else
- apic_timer_expired(apic);
-
- local_irq_restore(flags);
+ if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
+ start_sw_tscdeadline(apic);
}
}
@@ -1413,7 +1497,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
switch (reg) {
case APIC_ID: /* Local APIC ID */
if (!apic_x2apic_mode(apic))
- kvm_apic_set_id(apic, val >> 24);
+ kvm_apic_set_xapic_id(apic, val >> 24);
else
ret = 1;
break;
@@ -1674,9 +1758,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
/* update jump label if enable bit changes */
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
- if (value & MSR_IA32_APICBASE_ENABLE)
+ if (value & MSR_IA32_APICBASE_ENABLE) {
+ kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
static_key_slow_dec_deferred(&apic_hw_disabled);
- else
+ } else
static_key_slow_inc(&apic_hw_disabled.key);
recalculate_apic_map(vcpu->kvm);
}
@@ -1716,8 +1801,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
- if (!init_event)
- kvm_apic_set_id(apic, vcpu->vcpu_id);
+ if (!init_event) {
+ kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE);
+ kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
+ }
kvm_apic_set_version(apic->vcpu);
for (i = 0; i < KVM_APIC_LVT_NUM; i++)
@@ -1856,9 +1944,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
* thinking that APIC satet has changed.
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
- kvm_lapic_set_base(vcpu,
- APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
-
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_lapic_reset(vcpu, false);
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
@@ -1938,17 +2023,48 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
return vector;
}
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
- struct kvm_lapic_state *s)
+static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
+ struct kvm_lapic_state *s, bool set)
+{
+ if (apic_x2apic_mode(vcpu->arch.apic)) {
+ u32 *id = (u32 *)(s->regs + APIC_ID);
+
+ if (vcpu->kvm->arch.x2apic_format) {
+ if (*id != vcpu->vcpu_id)
+ return -EINVAL;
+ } else {
+ if (set)
+ *id >>= 24;
+ else
+ *id <<= 24;
+ }
+ }
+
+ return 0;
+}
+
+int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
+{
+ memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
+ return kvm_apic_state_fixup(vcpu, s, false);
+}
+
+int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ int r;
+
kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
/* set SPIV separately to get count of SW disabled APICs right */
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
+
+ r = kvm_apic_state_fixup(vcpu, s, true);
+ if (r)
+ return r;
memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
- /* call kvm_apic_set_id() to put apic into apic_map */
- kvm_apic_set_id(apic, kvm_apic_id(apic));
+
+ recalculate_apic_map(vcpu->kvm);
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
@@ -1974,6 +2090,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
kvm_rtc_eoi_tracking_restore_one(vcpu);
vcpu->arch.apic_arb_prio = 0;
+
+ return 0;
}
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 891c6da7d4aa..f60d01c29d51 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -20,6 +20,7 @@ struct kvm_timer {
u64 tscdeadline;
u64 expired_tscdeadline;
atomic_t pending; /* accumulated triggered timers */
+ bool hv_timer_in_use;
};
struct kvm_lapic {
@@ -80,8 +81,8 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
- struct kvm_lapic_state *s);
+int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
+int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -199,9 +200,15 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
-static inline int kvm_apic_id(struct kvm_lapic *apic)
+static inline u32 kvm_apic_id(struct kvm_lapic *apic)
{
- return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+ /* To avoid a race between apic_base and following APIC_ID update when
+ * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
+ */
+ if (apic_x2apic_mode(apic))
+ return apic->vcpu->vcpu_id;
+
+ return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
}
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
@@ -212,4 +219,8 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
const unsigned long *bitmap, u32 bitmap_size);
+void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
+void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
+bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index def97b3a392b..3d4cc8cc56a3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -29,7 +29,8 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/export.h>
#include <linux/swap.h>
#include <linux/hugetlb.h>
#include <linux/compiler.h>
@@ -175,6 +176,7 @@ static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mmio_mask;
+static u64 __read_mostly shadow_present_mask;
static void mmu_spte_set(u64 *sptep, u64 spte);
static void mmu_free_roots(struct kvm_vcpu *vcpu);
@@ -282,13 +284,14 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
}
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask)
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
+ shadow_present_mask = p_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@@ -304,7 +307,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
static int is_shadow_present_pte(u64 pte)
{
- return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
+ return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte);
}
static int is_large_pte(u64 pte)
@@ -523,7 +526,7 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte)
}
/* Rules for using mmu_spte_update:
- * Update the state bits, it means the mapped pfn is not changged.
+ * Update the state bits, it means the mapped pfn is not changed.
*
* Whenever we overwrite a writable spte with a read-only one we
* should flush remote TLBs. Otherwise rmap_write_protect
@@ -2245,10 +2248,9 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
{
u64 spte;
- BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK ||
- VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
+ BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
- spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
+ spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
mmu_spte_set(sptep, spte);
@@ -2515,13 +2517,19 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
bool can_unsync, bool host_writable)
{
- u64 spte;
+ u64 spte = 0;
int ret = 0;
if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
return 0;
- spte = PT_PRESENT_MASK;
+ /*
+ * For the EPT case, shadow_present_mask is 0 if hardware
+ * supports exec-only page table entries. In that case,
+ * ACC_USER_MASK and shadow_user_mask are used to represent
+ * read access. See FNAME(gpte_access) in paging_tmpl.h.
+ */
+ spte |= shadow_present_mask;
if (!speculative)
spte |= shadow_accessed_mask;
@@ -3189,7 +3197,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
MMU_WARN_ON(VALID_PAGE(root));
if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
- if (!is_present_gpte(pdptr)) {
+ if (!(pdptr & PT_PRESENT_MASK)) {
vcpu->arch.mmu.pae_root[i] = 0;
continue;
}
@@ -3914,9 +3922,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
* clearer.
*/
smap = cr4_smap && u && !uf && !ff;
- } else
- /* Not really needed: no U/S accesses on ept */
- u = 1;
+ }
fault = (ff && !x) || (uf && !u) || (wf && !w) ||
(smapf && smap);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 66b33b96a31b..ddc56e91f2e4 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -93,11 +93,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
return kvm_mmu_load(vcpu);
}
-static inline int is_present_gpte(unsigned long pte)
-{
- return pte & PT_PRESENT_MASK;
-}
-
/*
* Currently, we have two sorts of write-protection, a) the first one
* write-protects guest page to sync the guest modification, b) another one is
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bc019f70e0b6..a01105485315 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -131,7 +131,7 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
static inline int FNAME(is_present_gpte)(unsigned long pte)
{
#if PTTYPE != PTTYPE_EPT
- return is_present_gpte(pte);
+ return pte & PT_PRESENT_MASK;
#else
return pte & 7;
#endif
@@ -181,13 +181,19 @@ no_present:
return true;
}
+/*
+ * For PTTYPE_EPT, a page table can be executable but not readable
+ * on supported processors. Therefore, set_spte does not automatically
+ * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
+ * to signify readability since it isn't used in the EPT case
+ */
static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
{
unsigned access;
#if PTTYPE == PTTYPE_EPT
access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
- ACC_USER_MASK;
+ ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
#else
BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
BUILD_BUG_ON(ACC_EXEC_MASK != 1);
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index ab38af4f4947..9d4a8504a95a 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -93,7 +93,7 @@ static unsigned intel_find_fixed_event(int idx)
return intel_arch_events[fixed_pmc_events[idx]].event_type;
}
-/* check if a PMC is enabled by comparising it with globl_ctrl bits. */
+/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 16ef31b87452..af523d84d102 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1577,7 +1577,7 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
/*
- * Any change of EFLAGS.VM is accompained by a reload of SS
+ * Any change of EFLAGS.VM is accompanied by a reload of SS
* (caused by either a task switch or an inter-privilege IRET),
* so we do not need to update the CPL here.
*/
@@ -4940,6 +4940,12 @@ out:
static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
{
local_irq_enable();
+ /*
+ * We must have an instruction with interrupts enabled, so
+ * the timer interrupt isn't delayed by the interrupt shadow.
+ */
+ asm("nop");
+ local_irq_disable();
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 8de925031b5c..0a6cc6754ec5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1348,6 +1348,21 @@ TRACE_EVENT(kvm_avic_unaccelerated_access,
__entry->vec)
);
+TRACE_EVENT(kvm_hv_timer_state,
+ TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use),
+ TP_ARGS(vcpu_id, hv_timer_in_use),
+ TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
+ __field(unsigned int, hv_timer_in_use)
+ ),
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->hv_timer_in_use = hv_timer_in_use;
+ ),
+ TP_printk("vcpu_id %x hv_timer %x\n",
+ __entry->vcpu_id,
+ __entry->hv_timer_in_use)
+);
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index df07a0a4611f..a45d8580f91e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -110,6 +110,13 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
+/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
+static int __read_mostly cpu_preemption_timer_multi;
+static bool __read_mostly enable_preemption_timer = 1;
+#ifdef CONFIG_X86_64
+module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
+#endif
+
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
@@ -398,6 +405,12 @@ struct nested_vmx {
/* The host-usable pointer to the above */
struct page *current_vmcs12_page;
struct vmcs12 *current_vmcs12;
+ /*
+ * Cache of the guest's VMCS, existing outside of guest memory.
+ * Loaded from guest memory during VMPTRLD. Flushed to guest
+ * memory during VMXOFF, VMCLEAR, VMPTRLD.
+ */
+ struct vmcs12 *cached_vmcs12;
struct vmcs *current_shadow_vmcs;
/*
* Indicates if the shadow vmcs must be updated with the
@@ -421,7 +434,6 @@ struct nested_vmx {
struct pi_desc *pi_desc;
bool pi_pending;
u16 posted_intr_nv;
- u64 msr_ia32_feature_control;
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -597,11 +609,22 @@ struct vcpu_vmx {
#define PML_ENTITY_NUM 512
struct page *pml_pg;
+ /* apic deadline value in host tsc */
+ u64 hv_deadline_tsc;
+
u64 current_tsc_ratio;
bool guest_pkru_valid;
u32 guest_pkru;
u32 host_pkru;
+
+ /*
+ * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
+ * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
+ * in msr_ia32_feature_control_valid_bits.
+ */
+ u64 msr_ia32_feature_control;
+ u64 msr_ia32_feature_control_valid_bits;
};
enum segment_cache_field {
@@ -841,7 +864,7 @@ static inline short vmcs_field_to_offset(unsigned long field)
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
- return to_vmx(vcpu)->nested.current_vmcs12;
+ return to_vmx(vcpu)->nested.cached_vmcs12;
}
static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
@@ -1056,6 +1079,58 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
}
+/*
+ * Comment's format: document - errata name - stepping - processor name.
+ * Refer from
+ * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
+ */
+static u32 vmx_preemption_cpu_tfms[] = {
+/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
+0x000206E6,
+/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
+/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
+/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020652,
+/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020655,
+/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
+/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
+/*
+ * 320767.pdf - AAP86 - B1 -
+ * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
+ */
+0x000106E5,
+/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
+0x000106A0,
+/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
+0x000106A1,
+/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
+0x000106A4,
+ /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
+ /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
+ /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
+0x000106A5,
+};
+
+static inline bool cpu_has_broken_vmx_preemption_timer(void)
+{
+ u32 eax = cpuid_eax(0x00000001), i;
+
+ /* Clear the reserved bits */
+ eax &= ~(0x3U << 14 | 0xfU << 28);
+ for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
+ if (eax == vmx_preemption_cpu_tfms[i])
+ return true;
+
+ return false;
+}
+
+static inline bool cpu_has_vmx_preemption_timer(void)
+{
+ return vmcs_config.pin_based_exec_ctrl &
+ PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
static inline bool cpu_has_vmx_posted_intr(void)
{
return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
@@ -1603,6 +1678,11 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
__vmcs_writel(field, __vmcs_readl(field) | mask);
}
+static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+ vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
+}
+
static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
{
vmcs_write32(VM_ENTRY_CONTROLS, val);
@@ -1631,6 +1711,11 @@ static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
}
+static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+ vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
+}
+
static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
{
vmcs_write32(VM_EXIT_CONTROLS, val);
@@ -2121,22 +2206,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+ bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
if (!vmm_exclusive)
kvm_cpu_vmxon(phys_addr);
- else if (vmx->loaded_vmcs->cpu != cpu)
+ else if (!already_loaded)
loaded_vmcs_clear(vmx->loaded_vmcs);
- if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
- per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
- vmcs_load(vmx->loaded_vmcs->vmcs);
- }
-
- if (vmx->loaded_vmcs->cpu != cpu) {
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
- unsigned long sysenter_esp;
-
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ if (!already_loaded) {
local_irq_disable();
crash_disable_local_vmclear(cpu);
@@ -2151,6 +2228,18 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
&per_cpu(loaded_vmcss_on_cpu, cpu));
crash_enable_local_vmclear(cpu);
local_irq_enable();
+ }
+
+ if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+ per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+ vmcs_load(vmx->loaded_vmcs->vmcs);
+ }
+
+ if (!already_loaded) {
+ struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+ unsigned long sysenter_esp;
+
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
/*
* Linux uses per-cpu TSS and GDT, so set these when switching
@@ -2716,13 +2805,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_INVEPT_BIT;
+ if (cpu_has_vmx_ept_execute_only())
+ vmx->nested.nested_vmx_ept_caps |=
+ VMX_EPT_EXECUTE_ONLY_BIT;
vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
- /*
- * For nested guests, we don't do anything specific
- * for single context invalidation. Hence, only advertise
- * support for global context invalidation.
- */
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+ VMX_EPT_EXTENT_CONTEXT_BIT;
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -2853,7 +2941,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
vmx->nested.nested_vmx_secondary_ctls_high);
break;
case MSR_IA32_VMX_EPT_VPID_CAP:
- /* Currently, no nested vpid support */
*pdata = vmx->nested.nested_vmx_ept_caps |
((u64)vmx->nested.nested_vmx_vpid_caps << 32);
break;
@@ -2864,6 +2951,14 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
return 0;
}
+static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
+ uint64_t val)
+{
+ uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
+
+ return !(val & ~valid_bits);
+}
+
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@@ -2905,10 +3000,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
- case MSR_IA32_FEATURE_CONTROL:
- if (!nested_vmx_allowed(vcpu))
+ case MSR_IA32_MCG_EXT_CTL:
+ if (!msr_info->host_initiated &&
+ !(to_vmx(vcpu)->msr_ia32_feature_control &
+ FEATURE_CONTROL_LMCE))
return 1;
- msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+ msr_info->data = vcpu->arch.mcg_ext_ctl;
+ break;
+ case MSR_IA32_FEATURE_CONTROL:
+ msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
@@ -2998,12 +3098,20 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC_ADJUST:
ret = kvm_set_msr_common(vcpu, msr_info);
break;
+ case MSR_IA32_MCG_EXT_CTL:
+ if ((!msr_info->host_initiated &&
+ !(to_vmx(vcpu)->msr_ia32_feature_control &
+ FEATURE_CONTROL_LMCE)) ||
+ (data & ~MCG_EXT_CTL_LMCE_EN))
+ return 1;
+ vcpu->arch.mcg_ext_ctl = data;
+ break;
case MSR_IA32_FEATURE_CONTROL:
- if (!nested_vmx_allowed(vcpu) ||
- (to_vmx(vcpu)->nested.msr_ia32_feature_control &
+ if (!vmx_feature_control_msr_valid(vcpu, data) ||
+ (to_vmx(vcpu)->msr_ia32_feature_control &
FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
return 1;
- vmx->nested.msr_ia32_feature_control = data;
+ vmx->msr_ia32_feature_control = data;
if (msr_info->host_initiated && data == 0)
vmx_leave_nested(vcpu);
break;
@@ -3297,25 +3405,27 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmx_capability.ept, vmx_capability.vpid);
}
- min = VM_EXIT_SAVE_DEBUG_CONTROLS;
+ min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
- VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS;
+ VM_EXIT_CLEAR_BNDCFGS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
+ if (cpu_has_broken_vmx_preemption_timer())
+ _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
if (!(_cpu_based_2nd_exec_control &
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
- !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
@@ -3364,7 +3474,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
/*
* Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
- * but due to arrata below it can't be used. Workaround is to use
+ * but due to errata below it can't be used. Workaround is to use
* msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
*
* VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
@@ -4781,6 +4891,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+ /* Enable the preemption timer dynamically */
+ pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
}
@@ -4896,6 +5008,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
/* Control */
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ vmx->hv_deadline_tsc = -1;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
@@ -6016,12 +6129,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
trace_kvm_page_fault(gpa, exit_qualification);
- /* It is a write fault? */
- error_code = exit_qualification & PFERR_WRITE_MASK;
+ /* it is a read fault? */
+ error_code = (exit_qualification << 2) & PFERR_USER_MASK;
+ /* it is a write fault? */
+ error_code |= exit_qualification & PFERR_WRITE_MASK;
/* It is a fetch fault? */
error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
/* ept page table is present? */
- error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
+ error_code |= (exit_qualification & 0x38) != 0;
vcpu->arch.exit_qualification = exit_qualification;
@@ -6355,9 +6470,6 @@ static __init int hardware_setup(void)
for (msr = 0x800; msr <= 0x8ff; msr++)
vmx_disable_intercept_msr_read_x2apic(msr);
- /* According SDM, in x2apic mode, the whole id reg is used. But in
- * KVM, it only use the highest eight bits. Need to intercept it */
- vmx_enable_intercept_msr_read_x2apic(0x802);
/* TMCCT */
vmx_enable_intercept_msr_read_x2apic(0x839);
/* TPR */
@@ -6368,10 +6480,12 @@ static __init int hardware_setup(void)
vmx_disable_intercept_msr_write_x2apic(0x83f);
if (enable_ept) {
- kvm_mmu_set_mask_ptes(0ull,
+ kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
- 0ull, VMX_EPT_EXECUTABLE_MASK);
+ 0ull, VMX_EPT_EXECUTABLE_MASK,
+ cpu_has_vmx_ept_execute_only() ?
+ 0ull : VMX_EPT_READABLE_MASK);
ept_set_mmio_spte_mask();
kvm_enable_tdp();
} else
@@ -6393,8 +6507,21 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
+ if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
+ u64 vmx_msr;
+
+ rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+ cpu_preemption_timer_multi =
+ vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ } else {
+ kvm_x86_ops->set_hv_timer = NULL;
+ kvm_x86_ops->cancel_hv_timer = NULL;
+ }
+
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+ kvm_mce_cap_supported |= MCG_LMCE_P;
+
return alloc_kvm_area();
out8:
@@ -6862,16 +6989,22 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
- if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
+ if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
!= VMXON_NEEDED_FEATURES) {
kvm_inject_gp(vcpu, 0);
return 1;
}
+ vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ if (!vmx->nested.cached_vmcs12)
+ return -ENOMEM;
+
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
- if (!shadow_vmcs)
+ if (!shadow_vmcs) {
+ kfree(vmx->nested.cached_vmcs12);
return -ENOMEM;
+ }
/* mark vmcs as shadow */
shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */
@@ -6942,6 +7075,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
}
vmx->nested.posted_intr_nv = -1;
+
+ /* Flush VMCS12 to guest memory */
+ memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
+ VMCS12_SIZE);
+
kunmap(vmx->nested.current_vmcs12_page);
nested_release_page(vmx->nested.current_vmcs12_page);
vmx->nested.current_vmptr = -1ull;
@@ -6962,6 +7100,7 @@ static void free_nested(struct vcpu_vmx *vmx)
nested_release_vmcs12(vmx);
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
+ kfree(vmx->nested.cached_vmcs12);
/* Unpin physical memory we referred to in current vmcs02 */
if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page);
@@ -7365,6 +7504,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
vmx->nested.current_vmptr = vmptr;
vmx->nested.current_vmcs12 = new_vmcs12;
vmx->nested.current_vmcs12_page = page;
+ /*
+ * Load VMCS12 from guest memory since it is not already
+ * cached.
+ */
+ memcpy(vmx->nested.cached_vmcs12,
+ vmx->nested.current_vmcs12, VMCS12_SIZE);
+
if (enable_shadow_vmcs) {
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_SHADOW_VMCS);
@@ -7458,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
+ /*
+ * TODO: track mappings and invalidate
+ * single context requests appropriately
+ */
+ case VMX_EPT_EXTENT_CONTEXT:
kvm_mmu_sync_roots(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
- /* Trap single context invalidation invept calls */
BUG_ON(1);
break;
}
@@ -7560,6 +7710,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}
+static int handle_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ kvm_lapic_expired_hv_timer(vcpu);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7610,6 +7766,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
+ [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
};
static const int kvm_vmx_max_exit_handlers =
@@ -7918,6 +8075,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* the XSS exit bitmap in vmcs12.
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+ case EXIT_REASON_PREEMPTION_TIMER:
+ return false;
default:
return true;
}
@@ -8303,7 +8462,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
* the next L2->L1 exit.
*/
if (!is_guest_mode(vcpu) ||
- !nested_cpu_has2(vmx->nested.current_vmcs12,
+ !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
vmcs_write64(APIC_ACCESS_ADDR, hpa);
}
@@ -8436,7 +8595,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
"push %[sp]\n\t"
#endif
"pushf\n\t"
- "orl $0x200, (%%" _ASM_SP ")\n\t"
__ASM_SIZE(push) " $%c[cs]\n\t"
"call *%[entry]\n\t"
:
@@ -8449,8 +8607,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
- } else
- local_irq_enable();
+ }
}
static bool vmx_has_high_real_mode_segbase(void)
@@ -8601,6 +8758,26 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host);
}
+void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u64 tscl;
+ u32 delta_tsc;
+
+ if (vmx->hv_deadline_tsc == -1)
+ return;
+
+ tscl = rdtsc();
+ if (vmx->hv_deadline_tsc > tscl)
+ /* sure to be 32 bit only because checked on set_hv_timer */
+ delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
+ cpu_preemption_timer_multi);
+ else
+ delta_tsc = 0;
+
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
+}
+
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8650,6 +8827,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
+ vmx_arm_hv_timer(vcpu);
+
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -8940,6 +9119,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->nested.current_vmptr = -1ull;
vmx->nested.current_vmcs12 = NULL;
+ vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+
return &vmx->vcpu;
free_vmcs:
@@ -9080,6 +9261,13 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls())
vmcs_set_secondary_exec_control(secondary_exec_ctl);
+
+ if (nested_vmx_allowed(vcpu))
+ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+ FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ else
+ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+ ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9636,9 +9824,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
exec_control = vmcs12->pin_based_vm_exec_control;
- exec_control |= vmcs_config.pin_based_exec_ctrl;
+
+ /* Preemption timer setting is only taken from vmcs01. */
exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ exec_control |= vmcs_config.pin_based_exec_ctrl;
+ if (vmx->hv_deadline_tsc == -1)
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ /* Posted interrupts setting is only taken from vmcs12. */
if (nested_cpu_has_posted_intr(vmcs12)) {
/*
* Note that we use L0's vector here and in
@@ -10556,8 +10749,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs12->vm_exit_intr_error_code,
KVM_ISA_VMX);
- vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
- vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
+ vm_entry_controls_reset_shadow(vmx);
+ vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
/* if no vmcs02 cache requested, remove the one we used */
@@ -10566,8 +10759,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
- /* Update TSC_OFFSET if TSC was changed while L2 ran */
+ /* Update any VMCS fields that might have changed while L2 ran */
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+ if (vmx->hv_deadline_tsc == -1)
+ vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+ else
+ vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
/* This is needed for same reason as it was needed in prepare_vmcs02 */
vmx->host_rsp = 0;
@@ -10647,6 +10846,64 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
return X86EMUL_CONTINUE;
}
+#ifdef CONFIG_X86_64
+/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
+static inline int u64_shl_div_u64(u64 a, unsigned int shift,
+ u64 divisor, u64 *result)
+{
+ u64 low = a << shift, high = a >> (64 - shift);
+
+ /* To avoid the overflow on divq */
+ if (high >= divisor)
+ return 1;
+
+ /* Low hold the result, high hold rem which is discarded */
+ asm("divq %2\n\t" : "=a" (low), "=d" (high) :
+ "rm" (divisor), "0" (low), "1" (high));
+ *result = low;
+
+ return 0;
+}
+
+static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u64 tscl = rdtsc();
+ u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
+ u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+
+ /* Convert to host delta tsc if tsc scaling is enabled */
+ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
+ u64_shl_div_u64(delta_tsc,
+ kvm_tsc_scaling_ratio_frac_bits,
+ vcpu->arch.tsc_scaling_ratio,
+ &delta_tsc))
+ return -ERANGE;
+
+ /*
+ * If the delta tsc can't fit in the 32 bit after the multi shift,
+ * we can't use the preemption timer.
+ * It's possible that it fits on later vmentries, but checking
+ * on every vmentry is costly so we just use an hrtimer.
+ */
+ if (delta_tsc >> (cpu_preemption_timer_multi + 32))
+ return -ERANGE;
+
+ vmx->hv_deadline_tsc = tscl + delta_tsc;
+ vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+ return 0;
+}
+
+static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ vmx->hv_deadline_tsc = -1;
+ vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+}
+#endif
+
static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
if (ple_gap)
@@ -10691,7 +10948,7 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
* this case, return 1, otherwise, return 0.
*
*/
-static int vmx_pre_block(struct kvm_vcpu *vcpu)
+static int pi_pre_block(struct kvm_vcpu *vcpu)
{
unsigned long flags;
unsigned int dest;
@@ -10758,7 +11015,18 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
return 0;
}
-static void vmx_post_block(struct kvm_vcpu *vcpu)
+static int vmx_pre_block(struct kvm_vcpu *vcpu)
+{
+ if (pi_pre_block(vcpu))
+ return 1;
+
+ if (kvm_lapic_hv_timer_in_use(vcpu))
+ kvm_lapic_switch_to_sw_timer(vcpu);
+
+ return 0;
+}
+
+static void pi_post_block(struct kvm_vcpu *vcpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct pi_desc old, new;
@@ -10800,6 +11068,14 @@ static void vmx_post_block(struct kvm_vcpu *vcpu)
}
}
+static void vmx_post_block(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->set_hv_timer)
+ kvm_lapic_switch_to_hv_timer(vcpu);
+
+ pi_post_block(vcpu);
+}
+
/*
* vmx_update_pi_irte - set IRTE for Posted-Interrupts
*
@@ -10844,7 +11120,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* We will support full lowest-priority interrupt later.
*/
- kvm_set_msi_irq(e, &irq);
+ kvm_set_msi_irq(kvm, e, &irq);
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
/*
* Make sure the IRTE is in remapped mode if
@@ -10889,6 +11165,16 @@ out:
return ret;
}
+static void vmx_setup_mce(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.mcg_cap & MCG_LMCE_P)
+ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+ FEATURE_CONTROL_LMCE;
+ else
+ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+ ~FEATURE_CONTROL_LMCE;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -11013,6 +11299,13 @@ static struct kvm_x86_ops vmx_x86_ops = {
.pmu_ops = &intel_pmu_ops,
.update_pi_irte = vmx_update_pi_irte,
+
+#ifdef CONFIG_X86_64
+ .set_hv_timer = vmx_set_hv_timer,
+ .cancel_hv_timer = vmx_cancel_hv_timer,
+#endif
+
+ .setup_mce = vmx_setup_mce,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 45608a7da9b3..19f9f9e05c2a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,7 +36,8 @@
#include <linux/kvm.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/moduleparam.h>
#include <linux/mman.h>
#include <linux/highmem.h>
#include <linux/iommu.h>
@@ -70,7 +71,8 @@
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
-#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
+u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
+EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
#define emul_to_vcpu(ctxt) \
container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
@@ -89,8 +91,12 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
+ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
+static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
struct kvm_x86_ops *kvm_x86_ops __read_mostly;
@@ -113,7 +119,8 @@ u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
u64 __read_mostly kvm_max_tsc_scaling_ratio;
EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
-static u64 __read_mostly kvm_default_tsc_scaling_ratio;
+u64 __read_mostly kvm_default_tsc_scaling_ratio;
+EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
static u32 __read_mostly tsc_tolerance_ppm = 250;
@@ -537,7 +544,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
goto out;
}
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
- if (is_present_gpte(pdpte[i]) &&
+ if ((pdpte[i] & PT_PRESENT_MASK) &&
(pdpte[i] &
vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
ret = 0;
@@ -982,6 +989,7 @@ static u32 emulated_msrs[] = {
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL,
+ MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
};
@@ -1161,7 +1169,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
int version;
int r;
struct pvclock_wall_clock wc;
- struct timespec boot;
+ struct timespec64 boot;
if (!wall_clock)
return;
@@ -1184,13 +1192,13 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
* wall clock specified here. guest system time equals host
* system time for us, thus we must fill in host boot time here.
*/
- getboottime(&boot);
+ getboottime64(&boot);
if (kvm->arch.kvmclock_offset) {
- struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
- boot = timespec_sub(boot, ts);
+ struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
+ boot = timespec64_sub(boot, ts);
}
- wc.sec = boot.tv_sec;
+ wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
wc.nsec = boot.tv_nsec;
wc.version = version;
@@ -2615,6 +2623,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
break;
+ case KVM_CAP_X2APIC_API:
+ r = KVM_X2APIC_API_VALID_FLAGS;
+ break;
default:
r = 0;
break;
@@ -2677,11 +2688,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
break;
}
case KVM_X86_GET_MCE_CAP_SUPPORTED: {
- u64 mce_cap;
-
- mce_cap = KVM_MCE_CAP_SUPPORTED;
r = -EFAULT;
- if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+ if (copy_to_user(argp, &kvm_mce_cap_supported,
+ sizeof(kvm_mce_cap_supported)))
goto out;
r = 0;
break;
@@ -2733,6 +2742,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
rdtsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
+
+ if (kvm_lapic_hv_timer_in_use(vcpu) &&
+ kvm_x86_ops->set_hv_timer(vcpu,
+ kvm_get_lapic_tscdeadline_msr(vcpu)))
+ kvm_lapic_switch_to_sw_timer(vcpu);
if (check_tsc_unstable()) {
u64 offset = kvm_compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
@@ -2766,15 +2780,17 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
- memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
-
- return 0;
+ return kvm_apic_get_state(vcpu, s);
}
static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- kvm_apic_post_state_restore(vcpu, s);
+ int r;
+
+ r = kvm_apic_set_state(vcpu, s);
+ if (r)
+ return r;
update_cr8_intercept(vcpu);
return 0;
@@ -2859,7 +2875,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
r = -EINVAL;
if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
goto out;
- if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+ if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
goto out;
r = 0;
vcpu->arch.mcg_cap = mcg_cap;
@@ -2869,6 +2885,9 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
/* Init IA32_MCi_CTL to all 1s */
for (bank = 0; bank < bank_num; bank++)
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+
+ if (kvm_x86_ops->setup_mce)
+ kvm_x86_ops->setup_mce(vcpu);
out:
return r;
}
@@ -3767,7 +3786,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = -EEXIST;
if (irqchip_in_kernel(kvm))
goto split_irqchip_unlock;
- if (atomic_read(&kvm->online_vcpus))
+ if (kvm->created_vcpus)
goto split_irqchip_unlock;
r = kvm_setup_empty_irq_routing(kvm);
if (r)
@@ -3781,6 +3800,18 @@ split_irqchip_unlock:
mutex_unlock(&kvm->lock);
break;
}
+ case KVM_CAP_X2APIC_API:
+ r = -EINVAL;
+ if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
+ break;
+
+ if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
+ kvm->arch.x2apic_format = true;
+ if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+ kvm->arch.x2apic_broadcast_quirk_disabled = true;
+
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -3832,7 +3863,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (kvm->arch.vpic)
goto create_irqchip_unlock;
r = -EINVAL;
- if (atomic_read(&kvm->online_vcpus))
+ if (kvm->created_vcpus)
goto create_irqchip_unlock;
r = -ENOMEM;
vpic = kvm_create_pic(kvm);
@@ -3872,7 +3903,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
sizeof(struct kvm_pit_config)))
goto out;
create_pit:
- mutex_lock(&kvm->slots_lock);
+ mutex_lock(&kvm->lock);
r = -EEXIST;
if (kvm->arch.vpit)
goto create_pit_unlock;
@@ -3881,7 +3912,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (kvm->arch.vpit)
r = 0;
create_pit_unlock:
- mutex_unlock(&kvm->slots_lock);
+ mutex_unlock(&kvm->lock);
break;
case KVM_GET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
@@ -3988,7 +4019,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
case KVM_SET_BOOT_CPU_ID:
r = 0;
mutex_lock(&kvm->lock);
- if (atomic_read(&kvm->online_vcpus) != 0)
+ if (kvm->created_vcpus)
r = -EBUSY;
else
kvm->arch.bsp_vcpu_id = arg;
@@ -5296,13 +5327,8 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
/* This is a good place to trace that we are exiting SMM. */
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
- if (unlikely(vcpu->arch.smi_pending)) {
- kvm_make_request(KVM_REQ_SMI, vcpu);
- vcpu->arch.smi_pending = 0;
- } else {
- /* Process a latched INIT, if any. */
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- }
+ /* Process a latched INIT or SMI, if any. */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
kvm_mmu_reset_context(vcpu);
@@ -5848,8 +5874,8 @@ int kvm_arch_init(void *opaque)
kvm_x86_ops = ops;
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0);
-
+ PT_DIRTY_MASK, PT64_NX_MASK, 0,
+ PT_PRESENT_MASK);
kvm_timer_init();
perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@ -6083,7 +6109,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
}
/* try to inject new event if pending */
- if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+ if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+ vcpu->arch.smi_pending = false;
+ enter_smm(vcpu);
+ } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
--vcpu->arch.nmi_pending;
vcpu->arch.nmi_injected = true;
kvm_x86_ops->set_nmi(vcpu);
@@ -6106,6 +6135,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
kvm_x86_ops->set_irq(vcpu);
}
}
+
return 0;
}
@@ -6129,7 +6159,7 @@ static void process_nmi(struct kvm_vcpu *vcpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
-static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
+static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
{
u32 flags = 0;
flags |= seg->g << 23;
@@ -6143,7 +6173,7 @@ static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
return flags;
}
-static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
{
struct kvm_segment seg;
int offset;
@@ -6158,11 +6188,11 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
put_smstate(u32, buf, offset + 8, seg.base);
put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
+ put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
}
#ifdef CONFIG_X86_64
-static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
{
struct kvm_segment seg;
int offset;
@@ -6171,7 +6201,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
kvm_get_segment(vcpu, &seg, n);
offset = 0x7e00 + n * 16;
- flags = process_smi_get_segment_flags(&seg) >> 8;
+ flags = enter_smm_get_segment_flags(&seg) >> 8;
put_smstate(u16, buf, offset, seg.selector);
put_smstate(u16, buf, offset + 2, flags);
put_smstate(u32, buf, offset + 4, seg.limit);
@@ -6179,7 +6209,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
}
#endif
-static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
{
struct desc_ptr dt;
struct kvm_segment seg;
@@ -6203,13 +6233,13 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7fc4, seg.selector);
put_smstate(u32, buf, 0x7f64, seg.base);
put_smstate(u32, buf, 0x7f60, seg.limit);
- put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
+ put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
put_smstate(u32, buf, 0x7fc0, seg.selector);
put_smstate(u32, buf, 0x7f80, seg.base);
put_smstate(u32, buf, 0x7f7c, seg.limit);
- put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
+ put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
kvm_x86_ops->get_gdt(vcpu, &dt);
put_smstate(u32, buf, 0x7f74, dt.address);
@@ -6220,7 +6250,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7f54, dt.size);
for (i = 0; i < 6; i++)
- process_smi_save_seg_32(vcpu, buf, i);
+ enter_smm_save_seg_32(vcpu, buf, i);
put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
@@ -6229,7 +6259,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
}
-static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
{
#ifdef CONFIG_X86_64
struct desc_ptr dt;
@@ -6261,7 +6291,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
put_smstate(u16, buf, 0x7e90, seg.selector);
- put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8);
+ put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
put_smstate(u32, buf, 0x7e94, seg.limit);
put_smstate(u64, buf, 0x7e98, seg.base);
@@ -6271,7 +6301,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
put_smstate(u16, buf, 0x7e70, seg.selector);
- put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8);
+ put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
put_smstate(u32, buf, 0x7e74, seg.limit);
put_smstate(u64, buf, 0x7e78, seg.base);
@@ -6280,31 +6310,26 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u64, buf, 0x7e68, dt.address);
for (i = 0; i < 6; i++)
- process_smi_save_seg_64(vcpu, buf, i);
+ enter_smm_save_seg_64(vcpu, buf, i);
#else
WARN_ON_ONCE(1);
#endif
}
-static void process_smi(struct kvm_vcpu *vcpu)
+static void enter_smm(struct kvm_vcpu *vcpu)
{
struct kvm_segment cs, ds;
struct desc_ptr dt;
char buf[512];
u32 cr0;
- if (is_smm(vcpu)) {
- vcpu->arch.smi_pending = true;
- return;
- }
-
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
vcpu->arch.hflags |= HF_SMM_MASK;
memset(buf, 0, 512);
if (guest_cpuid_has_longmode(vcpu))
- process_smi_save_state_64(vcpu, buf);
+ enter_smm_save_state_64(vcpu, buf);
else
- process_smi_save_state_32(vcpu, buf);
+ enter_smm_save_state_32(vcpu, buf);
kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
@@ -6360,6 +6385,12 @@ static void process_smi(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
+static void process_smi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.smi_pending = true;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
+
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
@@ -6554,8 +6585,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (inject_pending_event(vcpu, req_int_win) != 0)
req_immediate_exit = true;
- /* enable NMI/IRQ window open exits if needed */
else {
+ /* Enable NMI/IRQ window open exits if needed.
+ *
+ * SMIs have two cases: 1) they can be nested, and
+ * then there is nothing to do here because RSM will
+ * cause a vmexit anyway; 2) or the SMI can be pending
+ * because inject_pending_event has completed the
+ * injection of an IRQ or NMI from the previous vmexit,
+ * and then we request an immediate exit to inject the SMI.
+ */
+ if (vcpu->arch.smi_pending && !is_smm(vcpu))
+ req_immediate_exit = true;
if (vcpu->arch.nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
@@ -6606,12 +6647,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_load_guest_xcr0(vcpu);
- if (req_immediate_exit)
+ if (req_immediate_exit) {
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
smp_send_reschedule(vcpu->cpu);
+ }
trace_kvm_entry(vcpu->vcpu_id);
wait_lapic_expire(vcpu);
- __kvm_guest_enter();
+ guest_enter_irqoff();
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
@@ -6662,16 +6705,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
++vcpu->stat.exits;
- /*
- * We must have an instruction between local_irq_enable() and
- * kvm_guest_exit(), so the timer interrupt isn't delayed by
- * the interrupt shadow. The stat.exits increment will do nicely.
- * But we need to prevent reordering, hence this barrier():
- */
- barrier();
-
- kvm_guest_exit();
+ guest_exit_irqoff();
+ local_irq_enable();
preempt_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -7408,6 +7444,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
vcpu->arch.hflags = 0;
+ vcpu->arch.smi_pending = 0;
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
vcpu->arch.nmi_injected = false;
@@ -7600,11 +7637,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
}
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
- return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu);
-}
-
struct static_key kvm_no_apic_vcpu __read_mostly;
EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
@@ -7871,7 +7903,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
- kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+ kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kvm_mmu_uninit_vm(kvm);
}
@@ -8379,7 +8411,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
/*
* When producer of consumer is unregistered, we change back to
* remapped mode, so we can re-use the current implementation
- * when the irq is masked/disabed or the consumer side (KVM
+ * when the irq is masked/disabled or the consumer side (KVM
* int this case doesn't want to receive the interrupts.
*/
ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
index a3c668875038..216a629a4a1a 100644
--- a/arch/x86/lib/cache-smp.c
+++ b/arch/x86/lib/cache-smp.c
@@ -1,5 +1,5 @@
#include <linux/smp.h>
-#include <linux/module.h>
+#include <linux/export.h>
static void __wbinvd(void *dummy)
{
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
index aa417a97511c..d6f848d1211d 100644
--- a/arch/x86/lib/cpu.c
+++ b/arch/x86/lib/cpu.c
@@ -1,4 +1,5 @@
-#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/export.h>
unsigned int x86_family(unsigned int sig)
{
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index 9845371c5c36..9a7fe6a70491 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -6,7 +6,7 @@
*/
#include <linux/compiler.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/checksum.h>
static inline unsigned short from32to16(unsigned a)
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index b6fcb9a9ddbc..8bd53589ecfb 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -5,7 +5,7 @@
* Wrappers of assembly checksum functions for x86-64.
*/
#include <asm/checksum.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/uaccess.h>
#include <asm/smap.h>
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 2f07c291dcc8..073d1f1a620b 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -11,7 +11,7 @@
* we have to worry about.
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/timex.h>
#include <linux/preempt.h>
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index 02de3d74d2c5..8a602a1e404a 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -35,6 +35,7 @@ ENDPROC(__sw_hweight32)
ENTRY(__sw_hweight64)
#ifdef CONFIG_X86_64
+ pushq %rdi
pushq %rdx
movq %rdi, %rdx # w -> t
@@ -60,6 +61,7 @@ ENTRY(__sw_hweight64)
shrq $56, %rax # w = w_tmp >> 56
popq %rdx
+ popq %rdi
ret
#else /* CONFIG_X86_32 */
/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index a404b4b75533..cad12634d6bd 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -1,5 +1,5 @@
#include <linux/string.h>
-#include <linux/module.h>
+#include <linux/export.h>
#undef memcpy
#undef memset
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index e5e3ed8dc079..c2311a678332 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -18,7 +18,7 @@
*/
#include <linux/hardirq.h>
#include <linux/string.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/types.h>
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c
index 8d6ef78b5d01..ff29e8d39414 100644
--- a/arch/x86/lib/msr-reg-export.c
+++ b/arch/x86/lib/msr-reg-export.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/msr.h>
EXPORT_SYMBOL(rdmsr_safe_regs);
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 518532e6a3fa..ce68b6a9d7d1 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/preempt.h>
#include <linux/smp.h>
#include <asm/msr.h>
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 004c861b1648..d1dee753b949 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -1,4 +1,5 @@
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
#include <linux/preempt.h>
#include <asm/msr.h>
#define CREATE_TRACE_POINTS
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index bd59090825db..dc0ad12f80bb 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -11,7 +11,7 @@
*/
#include <linux/string.h>
-#include <linux/module.h>
+#include <linux/export.h>
#ifdef __HAVE_ARCH_STRCPY
char *strcpy(char *dest, const char *src)
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index e342586db6e4..b4908789484e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -5,7 +5,7 @@
*/
#include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/word-at-a-time.h>
#include <linux/sched.h>
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index b559d9238781..3bc7baf2a711 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -8,7 +8,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/blkdev.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/backing-dev.h>
#include <linux/interrupt.h>
#include <asm/uaccess.h>
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 9f760cdcaf40..69873589c0ba 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -5,7 +5,7 @@
* Copyright 1997 Linus Torvalds
* Copyright 2002 Andi Kleen <ak@suse.de>
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/uaccess.h>
/*
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 2ca15b59fb3f..ba47524f56e8 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/string.h>
-#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/memblock.h>
#include <linux/bootmem.h>
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 9a17250bcbe0..ea9c49adaa1f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -14,7 +14,7 @@
#include <linux/debugfs.h>
#include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/seq_file.h>
#include <asm/pgtable.h>
@@ -454,8 +454,4 @@ static int __init pt_dump_init(void)
return 0;
}
-
__initcall(pt_dump_init);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
-MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index a6d739258137..6d18b70ed5a9 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,5 +1,5 @@
#include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/swap.h> /* for totalram_pages */
#include <linux/bootmem.h>
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fb4c1b42fc7e..620928903be3 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -208,7 +208,7 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
* adjust the page_size_mask for small range to go with
* big page size instead small one if nearby are ram too.
*/
-static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
+static void __ref adjust_range_page_size_mask(struct map_range *mr,
int nr_range)
{
int i;
@@ -396,7 +396,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
* This runs before bootmem is initialized and gets pages directly from
* the physical memory. To access them they are temporarily mapped.
*/
-unsigned long __init_refok init_memory_mapping(unsigned long start,
+unsigned long __ref init_memory_mapping(unsigned long start,
unsigned long end)
{
struct map_range mr[NR_RANGE_MR];
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 84df150ee77e..cf8059016ec8 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -5,7 +5,6 @@
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
*/
-#include <linux/module.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 53cc2256cf23..14b9dd71d9e8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -27,7 +27,6 @@
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/dma-mapping.h>
-#include <linux/module.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/memremap.h>
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 9c0ff045fdd4..ada98b39b8ad 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -18,7 +18,7 @@
#include <asm/iomap.h>
#include <asm/pat.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/highmem.h>
static int is_io_mapping_possible(resource_size_t base, unsigned long size)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index f0894910bdd7..7aaa2635862d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -9,7 +9,6 @@
#include <linux/bootmem.h>
#include <linux/init.h>
#include <linux/io.h>
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mmiotrace.h>
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index b4f2e7e9e907..4515bae36bbe 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -14,7 +14,6 @@
#include <linux/kernel.h>
#include <linux/kmemcheck.h>
#include <linux/mm.h>
-#include <linux/module.h>
#include <linux/page-flags.h>
#include <linux/percpu.h>
#include <linux/ptrace.h>
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index aec124214d97..c2638a7d2c10 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -1,5 +1,5 @@
#include <linux/kmemcheck.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mm.h>
#include <asm/page.h>
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index ddb2244b06a1..afc47f5c9531 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -11,7 +11,7 @@
#include <linux/rculist.h>
#include <linux/spinlock.h>
#include <linux/hash.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/ptrace.h>
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 0057a7accfb1..bef36622e408 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -24,7 +24,7 @@
#define DEBUG 1
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 968ac028c34e..fb682108f4dc 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -8,7 +8,6 @@
#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/ctype.h>
-#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
#include <linux/topology.h>
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 47b6436e41c2..6b7ce6279133 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -24,7 +24,7 @@
#include <linux/bootmem.h>
#include <linux/memblock.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include "numa_internal.h"
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index db00e3e2f3dc..ecb1b69c1651 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,7 +11,6 @@
#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/mm.h>
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 2f7702253ccf..de391b7bc19a 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -11,7 +11,6 @@
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/rbtree_augmented.h>
#include <linux/sched.h>
#include <linux/gfp.h>
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index 9f0614daea85..a235869532bc 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -26,7 +26,6 @@
* Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
*/
-#include <linux/module.h>
#include <linux/ptrace.h> /* struct pt_regs */
#include "pf_in.h"
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index e67ae0e6c59d..9adce776852b 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -8,7 +8,6 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
-#include <linux/module.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index e666cbbb9261..cfc3b9121ce4 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -1,6 +1,6 @@
#include <linux/bootmem.h>
#include <linux/mmdebug.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mm.h>
#include <asm/page.h>
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index b1ecff460a46..35fe69529bc1 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -13,7 +13,7 @@
#include <linux/acpi.h>
#include <linux/mmzone.h>
#include <linux/bitmap.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/topology.h>
#include <linux/mm.h>
#include <asm/proto.h>
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5643fd0b1a7d..4dbe65622810 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -4,7 +4,7 @@
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/cpu.h>
#include <asm/tlbflush.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 8196054fedb0..7b6a9d14c8c0 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -133,7 +133,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev)
if (pci_probe & PCI_NOASSIGN_BARS) {
/*
* If the BIOS did not assign the BAR, zero out the
- * resource so the kernel doesn't attmept to assign
+ * resource so the kernel doesn't attempt to assign
* it later on in pci_assign_unassigned_resources
*/
for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 5ceda85b8687..052c1cb76305 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -169,7 +169,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
size_t size,
dma_addr_t *dma_handle,
gfp_t flags,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vaddr;
diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
index 613cac7395c4..b814ca675131 100644
--- a/arch/x86/pci/vmd.c
+++ b/arch/x86/pci/vmd.c
@@ -119,10 +119,11 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
static void vmd_irq_enable(struct irq_data *data)
{
struct vmd_irq *vmdirq = data->chip_data;
+ unsigned long flags;
- raw_spin_lock(&list_lock);
+ raw_spin_lock_irqsave(&list_lock, flags);
list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
- raw_spin_unlock(&list_lock);
+ raw_spin_unlock_irqrestore(&list_lock, flags);
data->chip->irq_unmask(data);
}
@@ -130,12 +131,14 @@ static void vmd_irq_enable(struct irq_data *data)
static void vmd_irq_disable(struct irq_data *data)
{
struct vmd_irq *vmdirq = data->chip_data;
+ unsigned long flags;
data->chip->irq_mask(data);
- raw_spin_lock(&list_lock);
+ raw_spin_lock_irqsave(&list_lock, flags);
list_del_rcu(&vmdirq->node);
- raw_spin_unlock(&list_lock);
+ INIT_LIST_HEAD_RCU(&vmdirq->node);
+ raw_spin_unlock_irqrestore(&list_lock, flags);
}
/*
@@ -166,16 +169,20 @@ static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
* XXX: We can be even smarter selecting the best IRQ once we solve the
* affinity problem.
*/
-static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd)
+static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
{
- int i, best = 0;
+ int i, best = 1;
+ unsigned long flags;
- raw_spin_lock(&list_lock);
+ if (!desc->msi_attrib.is_msix || vmd->msix_count == 1)
+ return &vmd->irqs[0];
+
+ raw_spin_lock_irqsave(&list_lock, flags);
for (i = 1; i < vmd->msix_count; i++)
if (vmd->irqs[i].count < vmd->irqs[best].count)
best = i;
vmd->irqs[best].count++;
- raw_spin_unlock(&list_lock);
+ raw_spin_unlock_irqrestore(&list_lock, flags);
return &vmd->irqs[best];
}
@@ -184,14 +191,15 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
unsigned int virq, irq_hw_number_t hwirq,
msi_alloc_info_t *arg)
{
- struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(arg->desc)->bus);
+ struct msi_desc *desc = arg->desc;
+ struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
if (!vmdirq)
return -ENOMEM;
INIT_LIST_HEAD(&vmdirq->node);
- vmdirq->irq = vmd_next_irq(vmd);
+ vmdirq->irq = vmd_next_irq(vmd, desc);
vmdirq->virq = virq;
irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip,
@@ -203,11 +211,12 @@ static void vmd_msi_free(struct irq_domain *domain,
struct msi_domain_info *info, unsigned int virq)
{
struct vmd_irq *vmdirq = irq_get_chip_data(virq);
+ unsigned long flags;
/* XXX: Potential optimization to rebalance */
- raw_spin_lock(&list_lock);
+ raw_spin_lock_irqsave(&list_lock, flags);
vmdirq->irq->count--;
- raw_spin_unlock(&list_lock);
+ raw_spin_unlock_irqrestore(&list_lock, flags);
kfree_rcu(vmdirq, rcu);
}
@@ -261,18 +270,18 @@ static struct device *to_vmd_dev(struct device *dev)
static struct dma_map_ops *vmd_dma_ops(struct device *dev)
{
- return to_vmd_dev(dev)->archdata.dma_ops;
+ return get_dma_ops(to_vmd_dev(dev));
}
static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
- gfp_t flag, struct dma_attrs *attrs)
+ gfp_t flag, unsigned long attrs)
{
return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag,
attrs);
}
static void vmd_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t addr, struct dma_attrs *attrs)
+ dma_addr_t addr, unsigned long attrs)
{
return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr,
attrs);
@@ -280,7 +289,7 @@ static void vmd_free(struct device *dev, size_t size, void *vaddr,
static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t addr, size_t size,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr,
size, attrs);
@@ -288,7 +297,7 @@ static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t addr, size_t size,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr,
addr, size, attrs);
@@ -297,26 +306,26 @@ static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size,
dir, attrs);
}
static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs);
}
static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
}
static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
}
@@ -367,7 +376,7 @@ static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
{
struct dma_domain *domain = &vmd->dma_domain;
- if (vmd->dev->dev.archdata.dma_ops)
+ if (get_dma_ops(&vmd->dev->dev))
del_dma_domain(domain);
}
@@ -379,7 +388,7 @@ static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
static void vmd_setup_dma_ops(struct vmd_dev *vmd)
{
- const struct dma_map_ops *source = vmd->dev->dev.archdata.dma_ops;
+ const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev);
struct dma_map_ops *dest = &vmd->dma_ops;
struct dma_domain *domain = &vmd->dma_domain;
@@ -594,7 +603,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd)
sd->node = pcibus_to_node(vmd->dev->bus);
vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info,
- NULL);
+ x86_vector_domain);
if (!vmd->irq_domain)
return -ENODEV;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 99ddab79215e..3a483cb5ac81 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -9,7 +9,7 @@
* Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
* Stefano Stabellini <stefano.stabellini@eu.citrix.com>
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/acpi.h>
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 701fd5843c87..b27bccd4390f 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -11,11 +11,9 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/irq.h>
-#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/serial_reg.h>
#include <linux/serial_8250.h>
-#include <linux/reboot.h>
#include <asm/ce4100.h>
#include <asm/prom.h>
diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
index 524142117296..5fdacb322ceb 100644
--- a/arch/x86/platform/efi/early_printk.c
+++ b/arch/x86/platform/efi/early_printk.c
@@ -44,7 +44,7 @@ early_initcall(early_efi_map_fb);
* In case earlyprintk=efi,keep we have the whole framebuffer mapped already
* so just return the offset efi_fb + start.
*/
-static __init_refok void *early_efi_map(unsigned long start, unsigned long len)
+static __ref void *early_efi_map(unsigned long start, unsigned long len)
{
unsigned long base;
@@ -56,7 +56,7 @@ static __init_refok void *early_efi_map(unsigned long start, unsigned long len)
return early_ioremap(base + start, len);
}
-static __init_refok void early_efi_unmap(void *addr, unsigned long len)
+static __ref void early_efi_unmap(void *addr, unsigned long len)
{
if (!efi_fb)
early_iounmap(addr, len);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 17c8bbd4e2f0..1fbb408e2e72 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,7 +51,6 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
-#include <asm/rtc.h>
#include <asm/uv/uv.h>
static struct efi efi_phys __initdata;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 3e12c44f88a2..677e29e29473 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -24,7 +24,8 @@
#include <linux/spinlock.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
#include <linux/efi.h>
#include <linux/uaccess.h>
#include <linux/io.h>
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index abbf49c6e9d3..ce119d2ba0d0 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -20,7 +20,7 @@
#include <linux/scatterlist.h>
#include <linux/sfi.h>
#include <linux/irq.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/notifier.h>
#include <asm/setup.h>
diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
index ee40fcb6e54d..58024862a7eb 100644
--- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c
+++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
#include <asm/intel-mid.h>
#include <asm/intel_mid_vrtc.h>
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 5bc90dd102d4..c901a3423772 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -21,10 +21,9 @@
#include <linux/delay.h>
#include <linux/errno.h>
-#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/pci.h>
@@ -407,7 +406,6 @@ static const struct pci_device_id mid_pwr_pci_ids[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&mid_info },
{}
};
-MODULE_DEVICE_TABLE(pci, mid_pwr_pci_ids);
static struct pci_driver mid_pwr_pci_driver = {
.name = "intel_mid_pwr",
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 1555672d436f..051d264fce2e 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -24,7 +24,7 @@
#include <linux/input.h>
#include <linux/platform_device.h>
#include <linux/irq.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/notifier.h>
#include <linux/mmc/core.h>
#include <linux/mmc/card.h>
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 27376081ddec..7c3077e58fa0 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -12,7 +12,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/string.h>
diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c
index e7604f62870d..f1aab8cdb33f 100644
--- a/arch/x86/platform/olpc/olpc_ofw.c
+++ b/arch/x86/platform/olpc/olpc_ofw.c
@@ -1,9 +1,12 @@
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/spinlock_types.h>
#include <linux/init.h>
#include <asm/page.h>
#include <asm/setup.h>
#include <asm/io.h>
+#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
#include <asm/pgtable.h>
#include <asm/olpc_ofw.h>
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
index baf16e72e668..fd39301f25ac 100644
--- a/arch/x86/platform/ts5500/ts5500.c
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -23,7 +23,7 @@
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/leds.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/platform_data/gpio-ts5500.h>
#include <linux/platform_data/max197.h>
#include <linux/platform_device.h>
@@ -345,7 +345,3 @@ error:
return err;
}
device_initcall(ts5500_init);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>");
-MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver");
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index e1c24631afbb..776c6592136c 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -8,7 +8,7 @@
* Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/irq.h>
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 8dd80050d705..cd5173a2733f 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -24,7 +24,7 @@
#include <linux/kdb.h>
#include <linux/kexec.h>
#include <linux/kgdb.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/slab.h>
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f2b5e6a5cf95..f0b5f2d402af 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -37,11 +37,11 @@ unsigned long jump_address_phys;
*/
unsigned long restore_cr3 __visible;
-pgd_t *temp_level4_pgt __visible;
+unsigned long temp_level4_pgt __visible;
unsigned long relocated_restore_code __visible;
-static int set_up_temporary_text_mapping(void)
+static int set_up_temporary_text_mapping(pgd_t *pgd)
{
pmd_t *pmd;
pud_t *pud;
@@ -71,7 +71,7 @@ static int set_up_temporary_text_mapping(void)
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+ set_pgd(pgd + pgd_index(restore_jump_address),
__pgd(__pa(pud) | _KERNPG_TABLE));
return 0;
@@ -90,15 +90,16 @@ static int set_up_temporary_mappings(void)
.kernel_mapping = true,
};
unsigned long mstart, mend;
+ pgd_t *pgd;
int result;
int i;
- temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!temp_level4_pgt)
+ pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pgd)
return -ENOMEM;
/* Prepare a temporary mapping for the kernel text */
- result = set_up_temporary_text_mapping();
+ result = set_up_temporary_text_mapping(pgd);
if (result)
return result;
@@ -107,13 +108,12 @@ static int set_up_temporary_mappings(void)
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
- result = kernel_ident_mapping_init(&info, temp_level4_pgt,
- mstart, mend);
-
+ result = kernel_ident_mapping_init(&info, pgd, mstart, mend);
if (result)
return result;
}
+ temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
return 0;
}
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 8eee0e9c93f0..ce8da3a0412c 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -72,8 +72,6 @@ ENTRY(restore_image)
/* code below has been relocated to a safe page */
ENTRY(core_restore_code)
/* switch to temporary page tables */
- movq $__PAGE_OFFSET, %rcx
- subq %rcx, %rax
movq %rax, %cr3
/* flush TLB */
movq %rbx, %rcx
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 12734a96df47..ac58c1616408 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -8,6 +8,8 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
targets += purgatory.ro
+KCOV_INSTRUMENT := n
+
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
# sure how to relocate those. Like kexec-tools, use custom flags.
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index c556c5ae8de5..25012abc3409 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -48,7 +48,7 @@ targets += realmode.lds
$(obj)/realmode.lds: $(obj)/pasyms.h
LDFLAGS_realmode.elf := --emit-relocs -T
-CPPFLAGS_realmode.lds += -P -C -I$(obj)
+CPPFLAGS_realmode.lds += -P -C -I$(objtree)/$(obj)
targets += realmode.elf
$(obj)/realmode.elf: $(obj)/realmode.lds $(REALMODE_OBJS) FORCE
diff --git a/arch/x86/um/delay.c b/arch/x86/um/delay.c
index f3fe1a688f7e..a8fb7ca4822b 100644
--- a/arch/x86/um/delay.c
+++ b/arch/x86/um/delay.c
@@ -7,7 +7,7 @@
* published by the Free Software Foundation.
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <asm/param.h>
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 6c803ca49b5d..d72dec406ccb 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -2,6 +2,9 @@
# Building vDSO images for x86.
#
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
VDSO64-y := y
vdso-install-$(VDSO64-y) += vdso.so
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index c8377fb26cdf..1daff5545c0a 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -1,7 +1,6 @@
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
-#include <linux/module.h>
#include "debugfs.h"
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 69b4b6d29738..8ffb089b19a5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -23,7 +23,7 @@
#include <linux/sched.h>
#include <linux/kprobes.h>
#include <linux/bootmem.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/highmem.h>
@@ -34,9 +34,7 @@
#include <linux/edd.h>
#include <linux/frame.h>
-#ifdef CONFIG_KEXEC_CORE
#include <linux/kexec.h>
-#endif
#include <xen/xen.h>
#include <xen/events.h>
@@ -1334,7 +1332,8 @@ static void xen_crash_shutdown(struct pt_regs *regs)
static int
xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- xen_reboot(SHUTDOWN_crash);
+ if (!kexec_crash_loaded())
+ xen_reboot(SHUTDOWN_crash);
return NOTIFY_DONE;
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 67433714b791..7d5afdb417cc 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -43,7 +43,8 @@
#include <linux/debugfs.h>
#include <linux/bug.h>
#include <linux/vmalloc.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/seq_file.h>
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index dd2a49a8aacc..37129db76d33 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -60,7 +60,7 @@
*/
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/sched.h>
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 9586ff32810c..d37a0c7f82cb 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -21,7 +21,7 @@
#include <linux/init.h>
#include <linux/io.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <xen/platform_pci.h>
#include "xen-ops.h"
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e345891450c3..176425233e4d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -4,7 +4,7 @@
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/pm.h>