summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/eboot.c32
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c5
-rw-r--r--arch/x86/include/asm/acpi.h23
-rw-r--r--arch/x86/include/asm/atomic.h4
-rw-r--r--arch/x86/include/asm/atomic64_64.h4
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/hpet.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/string_64.h5
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event.c39
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cstate.c694
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c61
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c34
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/hpet.c29
-rw-r--r--arch/x86/kernel/pci-dma.c5
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c12
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/tsc.c35
-rw-r--r--arch/x86/kvm/emulate.c10
-rw-r--r--arch/x86/kvm/vmx.c26
-rw-r--r--arch/x86/kvm/x86.c135
-rw-r--r--arch/x86/lib/x86-opcode-map.txt24
-rw-r--r--arch/x86/mm/pageattr.c9
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c9
-rw-r--r--arch/x86/platform/efi/efi.c28
-rw-r--r--arch/x86/um/ldt.c5
40 files changed, 1168 insertions, 218 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ee1b6d346b98..583d539a4197 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -624,7 +624,7 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
static efi_status_t
__gop_query32(struct efi_graphics_output_protocol_32 *gop32,
struct efi_graphics_output_mode_info **info,
- unsigned long *size, u32 *fb_base)
+ unsigned long *size, u64 *fb_base)
{
struct efi_graphics_output_protocol_mode_32 *mode;
efi_status_t status;
@@ -650,7 +650,8 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
unsigned long nr_gops;
u16 width, height;
u32 pixels_per_scan_line;
- u32 fb_base;
+ u32 ext_lfb_base;
+ u64 fb_base;
struct efi_pixel_bitmask pixel_info;
int pixel_format;
efi_status_t status;
@@ -667,6 +668,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u32 h = handles[i];
+ u64 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop32);
@@ -678,7 +680,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query32(gop32, &info, &size, &fb_base);
+ status = __gop_query32(gop32, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -692,6 +694,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
@@ -713,6 +716,13 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
si->lfb_width = width;
si->lfb_height = height;
si->lfb_base = fb_base;
+
+ ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+ if (ext_lfb_base) {
+ si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ si->ext_lfb_base = ext_lfb_base;
+ }
+
si->pages = 1;
setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
@@ -727,7 +737,7 @@ out:
static efi_status_t
__gop_query64(struct efi_graphics_output_protocol_64 *gop64,
struct efi_graphics_output_mode_info **info,
- unsigned long *size, u32 *fb_base)
+ unsigned long *size, u64 *fb_base)
{
struct efi_graphics_output_protocol_mode_64 *mode;
efi_status_t status;
@@ -753,7 +763,8 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
unsigned long nr_gops;
u16 width, height;
u32 pixels_per_scan_line;
- u32 fb_base;
+ u32 ext_lfb_base;
+ u64 fb_base;
struct efi_pixel_bitmask pixel_info;
int pixel_format;
efi_status_t status;
@@ -770,6 +781,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u64 h = handles[i];
+ u64 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop64);
@@ -781,7 +793,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query64(gop64, &info, &size, &fb_base);
+ status = __gop_query64(gop64, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -795,6 +807,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
@@ -816,6 +829,13 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
si->lfb_width = width;
si->lfb_height = height;
si->lfb_base = fb_base;
+
+ ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+ if (ext_lfb_base) {
+ si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ si->ext_lfb_base = ext_lfb_base;
+ }
+
si->pages = 1;
setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 2d6b309c8e9a..6236b9ec4b76 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -154,7 +154,7 @@ extra_header_fields:
#else
.quad 0 # ImageBase
#endif
- .long CONFIG_PHYSICAL_ALIGN # SectionAlignment
+ .long 0x20 # SectionAlignment
.long 0x20 # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 80a0e4389c9a..bacaa13acac5 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -554,6 +554,11 @@ static int __init camellia_aesni_init(void)
{
const char *feature_name;
+ if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ pr_info("AVX or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 3a45668f6dc3..94c18ebfd68c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -32,6 +32,10 @@
#include <asm/mpspec.h>
#include <asm/realmode.h>
+#ifdef CONFIG_ACPI_APEI
+# include <asm/pgtable_types.h>
+#endif
+
#ifdef CONFIG_ACPI
extern int acpi_lapic;
extern int acpi_ioapic;
@@ -147,4 +151,23 @@ extern int x86_acpi_numa_init(void);
#define acpi_unlazy_tlb(x) leave_mm(x)
+#ifdef CONFIG_ACPI_APEI
+static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
+{
+ /*
+ * We currently have no way to look up the EFI memory map
+ * attributes for a region in a consistent way, because the
+ * memmap is discarded after efi_free_boot_services(). So if
+ * you call efi_mem_attributes() during boot and at runtime,
+ * you could theoretically see different attributes.
+ *
+ * Since we are yet to see any x86 platforms that require
+ * anything other than PAGE_KERNEL (some arm64 platforms
+ * require the equivalent of PAGE_KERNEL_NOCACHE), return that
+ * until we know differently.
+ */
+ return PAGE_KERNEL;
+}
+#endif
+
#endif /* _ASM_X86_ACPI_H */
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index fb52aa644aab..ae5fb83e6d91 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -24,7 +24,7 @@
*/
static __always_inline int atomic_read(const atomic_t *v)
{
- return ACCESS_ONCE((v)->counter);
+ return READ_ONCE((v)->counter);
}
/**
@@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v)
*/
static __always_inline void atomic_set(atomic_t *v, int i)
{
- v->counter = i;
+ WRITE_ONCE(v->counter, i);
}
/**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 50e33eff58de..037351022f54 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -18,7 +18,7 @@
*/
static inline long atomic64_read(const atomic64_t *v)
{
- return ACCESS_ONCE((v)->counter);
+ return READ_ONCE((v)->counter);
}
/**
@@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v)
*/
static inline void atomic64_set(atomic64_t *v, long i)
{
- v->counter = i;
+ WRITE_ONCE(v->counter, i);
}
/**
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ae68be92f755..0010c78c4998 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -105,6 +105,7 @@ extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
extern pgd_t * __init efi_call_phys_prolog(void);
extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
+extern void __init efi_print_memmap(void);
extern void __init efi_unmap_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 5fa9fb0f8809..cc285ec4b2c1 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,10 +63,10 @@
/* hpet memory map physical address */
extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
-extern int boot_hpet_disable;
+extern bool boot_hpet_disable;
extern u8 hpet_blockid;
-extern int hpet_force_user;
-extern u8 hpet_msi_disable;
+extern bool hpet_force_user;
+extern bool hpet_msi_disable;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2beee0382088..3a36ee704c30 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1226,10 +1226,8 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index e4661196994e..ff8b9a17dc4b 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
function. */
#define __HAVE_ARCH_MEMCPY 1
+extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
#ifndef CONFIG_KMEMCHECK
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
-extern void *memcpy(void *to, const void *from, size_t len);
-#else
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
#define memcpy(dst, src, len) \
({ \
size_t __len = (len); \
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c60bb162622..4f2821527014 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2547,7 +2547,9 @@ void __init setup_ioapic_dest(void)
mask = apic->target_cpus();
chip = irq_data_get_irq_chip(idata);
- chip->irq_set_affinity(idata, mask, false);
+ /* Might be lapic_chip for irq 0 */
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(idata, mask, false);
}
}
#endif
@@ -2907,6 +2909,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
struct irq_data *irq_data;
struct mp_chip_data *data;
struct irq_alloc_info *info = arg;
+ unsigned long flags;
if (!info || nr_irqs > 1)
return -EINVAL;
@@ -2939,11 +2942,14 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
cfg = irqd_cfg(irq_data);
add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+
+ local_irq_save(flags);
if (info->ioapic_entry)
mp_setup_entry(cfg, data, info->ioapic_entry);
mp_register_handler(virq, data->trigger);
if (virq < nr_legacy_irqs())
legacy_pic->mask(virq);
+ local_irq_restore(flags);
apic_printk(APIC_VERBOSE, KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4eb065c6bed2..58031303e304 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
perf_event_intel_uncore_snb.o \
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index be4febc58b94..e38d338a6447 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -157,7 +157,7 @@ struct _cpuid4_info_regs {
struct amd_northbridge *nb;
};
-unsigned short num_cache_leaves;
+static unsigned short num_cache_leaves;
/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
@@ -326,7 +326,7 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb)
*
* @returns: the disabled index if used or negative value if slot free.
*/
-int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
+static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
{
unsigned int reg = 0;
@@ -403,8 +403,8 @@ static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
*
* @return: 0 on success, error status on failure
*/
-int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
- unsigned long index)
+static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
+ unsigned slot, unsigned long index)
{
int ret = 0;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 66dd3fe99b82..4562cf070c27 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1175,7 +1175,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
* skip the schedulability test here, it will be performed
* at commit time (->commit_txn) as a whole.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN)
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto done_collect;
ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1326,7 +1326,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
* XXX assumes any ->del() called during a TXN will only be on
* an event added during that same TXN.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN)
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
return;
/*
@@ -1748,11 +1748,22 @@ static inline void x86_pmu_read(struct perf_event *event)
* Start group events scheduling transaction
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
*/
-static void x86_pmu_start_txn(struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */
+
+ cpuc->txn_flags = txn_flags;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
perf_pmu_disable(pmu);
- __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
__this_cpu_write(cpu_hw_events.n_txn, 0);
}
@@ -1763,7 +1774,16 @@ static void x86_pmu_start_txn(struct pmu *pmu)
*/
static void x86_pmu_cancel_txn(struct pmu *pmu)
{
- __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
+ unsigned int txn_flags;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+ txn_flags = cpuc->txn_flags;
+ cpuc->txn_flags = 0;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
/*
* Truncate collected array by the number of events added in this
* transaction. See x86_pmu_add() and x86_pmu_*_txn().
@@ -1786,6 +1806,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
int assign[X86_PMC_IDX_MAX];
int n, ret;
+ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+ if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+ cpuc->txn_flags = 0;
+ return 0;
+ }
+
n = cpuc->n_events;
if (!x86_pmu_initialized())
@@ -1801,7 +1828,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
- cpuc->group_flag &= ~PERF_EVENT_TXN;
+ cpuc->txn_flags = 0;
perf_pmu_enable(pmu);
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 165be83a7fa4..499f533dd3cc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -196,7 +196,7 @@ struct cpu_hw_events {
int n_excl; /* the number of exclusive events */
- unsigned int group_flag;
+ unsigned int txn_flags;
int is_fake;
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index d1c0f254afbe..2cad71d1b14c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -495,6 +495,19 @@ static int bts_event_init(struct perf_event *event)
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;
+ /*
+ * BTS leaks kernel addresses even when CPL0 tracing is
+ * disabled, so disallow intel_bts driver for unprivileged
+ * users on paranoid systems since it provides trace data
+ * to the user in a zero-copy fashion.
+ *
+ * Note that the default paranoia setting permits unprivileged
+ * users to profile the kernel.
+ */
+ if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
+ !capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
ret = x86_reserve_hardware();
if (ret) {
x86_del_exclusive(x86_lbr_exclusive_bts);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
new file mode 100644
index 000000000000..75a38b5a2e26
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
@@ -0,0 +1,694 @@
+/*
+ * perf_event_intel_cstate.c: support cstate residency counters
+ *
+ * Copyright (C) 2015, Intel Corp.
+ * Author: Kan Liang (kan.liang@intel.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ */
+
+/*
+ * This file export cstate related free running (read-only) counters
+ * for perf. These counters may be use simultaneously by other tools,
+ * such as turbostat. However, it still make sense to implement them
+ * in perf. Because we can conveniently collect them together with
+ * other events, and allow to use them from tools without special MSR
+ * access code.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it is not supported by the hardware.
+ *
+ * According to counters' scope and category, two PMUs are registered
+ * with the perf_event core subsystem.
+ * - 'cstate_core': The counter is available for each physical core.
+ * The counters include CORE_C*_RESIDENCY.
+ * - 'cstate_pkg': The counter is available for each physical package.
+ * The counters include PKG_C*_RESIDENCY.
+ *
+ * All of these counters are specified in the Intel® 64 and IA-32
+ * Architectures Software Developer.s Manual Vol3b.
+ *
+ * Model specific counters:
+ * MSR_CORE_C1_RES: CORE C1 Residency Counter
+ * perf code: 0x00
+ * Available model: SLM,AMT
+ * Scope: Core (each processor core has a MSR)
+ * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
+ * perf code: 0x01
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
+ * perf code: 0x02
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
+ * perf code: 0x03
+ * Available model: SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
+ * perf code: 0x00
+ * Available model: SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
+ * perf code: 0x01
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
+ * perf code: 0x02
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
+ * perf code: 0x03
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
+ * perf code: 0x04
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
+ * perf code: 0x05
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
+ * perf code: 0x06
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "perf_event.h"
+
+#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+
+struct perf_cstate_msr {
+ u64 msr;
+ struct perf_pmu_events_attr *attr;
+ bool (*test)(int idx);
+};
+
+
+/* cstate_core PMU */
+
+static struct pmu cstate_core_pmu;
+static bool has_cstate_core;
+
+enum perf_cstate_core_id {
+ /*
+ * cstate_core events
+ */
+ PERF_CSTATE_CORE_C1_RES = 0,
+ PERF_CSTATE_CORE_C3_RES,
+ PERF_CSTATE_CORE_C6_RES,
+ PERF_CSTATE_CORE_C7_RES,
+
+ PERF_CSTATE_CORE_EVENT_MAX,
+};
+
+bool test_core(int idx)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6)
+ return false;
+
+ switch (boot_cpu_data.x86_model) {
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 69: /* 22nm Haswell ULT */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES ||
+ idx == PERF_CSTATE_CORE_C7_RES)
+ return true;
+ break;
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case 76: /* 14nm Atom "Airmont" */
+ if (idx == PERF_CSTATE_CORE_C1_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+
+static struct perf_cstate_msr core_msr[] = {
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
+};
+
+static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+ NULL,
+};
+
+static struct attribute_group core_events_attr_group = {
+ .name = "events",
+ .attrs = core_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
+static struct attribute *core_format_attrs[] = {
+ &format_attr_core_event.attr,
+ NULL,
+};
+
+static struct attribute_group core_format_attr_group = {
+ .name = "format",
+ .attrs = core_format_attrs,
+};
+
+static cpumask_t cstate_core_cpu_mask;
+static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
+
+static struct attribute *cstate_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = cstate_cpumask_attrs,
+};
+
+static const struct attribute_group *core_attr_groups[] = {
+ &core_events_attr_group,
+ &core_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+/* cstate_core PMU end */
+
+
+/* cstate_pkg PMU */
+
+static struct pmu cstate_pkg_pmu;
+static bool has_cstate_pkg;
+
+enum perf_cstate_pkg_id {
+ /*
+ * cstate_pkg events
+ */
+ PERF_CSTATE_PKG_C2_RES = 0,
+ PERF_CSTATE_PKG_C3_RES,
+ PERF_CSTATE_PKG_C6_RES,
+ PERF_CSTATE_PKG_C7_RES,
+ PERF_CSTATE_PKG_C8_RES,
+ PERF_CSTATE_PKG_C9_RES,
+ PERF_CSTATE_PKG_C10_RES,
+
+ PERF_CSTATE_PKG_EVENT_MAX,
+};
+
+bool test_pkg(int idx)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6)
+ return false;
+
+ switch (boot_cpu_data.x86_model) {
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES ||
+ idx == PERF_CSTATE_CORE_C7_RES)
+ return true;
+ break;
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ if (idx == PERF_CSTATE_PKG_C2_RES ||
+ idx == PERF_CSTATE_PKG_C3_RES ||
+ idx == PERF_CSTATE_PKG_C6_RES ||
+ idx == PERF_CSTATE_PKG_C7_RES)
+ return true;
+ break;
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case 76: /* 14nm Atom "Airmont" */
+ if (idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ case 69: /* 22nm Haswell ULT */
+ if (idx == PERF_CSTATE_PKG_C2_RES ||
+ idx == PERF_CSTATE_PKG_C3_RES ||
+ idx == PERF_CSTATE_PKG_C6_RES ||
+ idx == PERF_CSTATE_PKG_C7_RES ||
+ idx == PERF_CSTATE_PKG_C8_RES ||
+ idx == PERF_CSTATE_PKG_C9_RES ||
+ idx == PERF_CSTATE_PKG_C10_RES)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
+
+static struct perf_cstate_msr pkg_msr[] = {
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
+};
+
+static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
+ NULL,
+};
+
+static struct attribute_group pkg_events_attr_group = {
+ .name = "events",
+ .attrs = pkg_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
+static struct attribute *pkg_format_attrs[] = {
+ &format_attr_pkg_event.attr,
+ NULL,
+};
+static struct attribute_group pkg_format_attr_group = {
+ .name = "format",
+ .attrs = pkg_format_attrs,
+};
+
+static cpumask_t cstate_pkg_cpu_mask;
+
+static const struct attribute_group *pkg_attr_groups[] = {
+ &pkg_events_attr_group,
+ &pkg_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+/* cstate_pkg PMU end*/
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ if (pmu == &cstate_core_pmu)
+ return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
+ else if (pmu == &cstate_pkg_pmu)
+ return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+ else
+ return 0;
+}
+
+static int cstate_pmu_event_init(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+ int ret = 0;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ if (event->pmu == &cstate_core_pmu) {
+ if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
+ return -EINVAL;
+ if (!core_msr[cfg].attr)
+ return -EINVAL;
+ event->hw.event_base = core_msr[cfg].msr;
+ } else if (event->pmu == &cstate_pkg_pmu) {
+ if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
+ return -EINVAL;
+ if (!pkg_msr[cfg].attr)
+ return -EINVAL;
+ event->hw.event_base = pkg_msr[cfg].msr;
+ } else
+ return -ENOENT;
+
+ /* must be done before validate_group */
+ event->hw.config = cfg;
+ event->hw.idx = -1;
+
+ return ret;
+}
+
+static inline u64 cstate_pmu_read_counter(struct perf_event *event)
+{
+ u64 val;
+
+ rdmsrl(event->hw.event_base, val);
+ return val;
+}
+
+static void cstate_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_raw_count, new_raw_count;
+
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = cstate_pmu_read_counter(event);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ goto again;
+
+ local64_add(new_raw_count - prev_raw_count, &event->count);
+}
+
+static void cstate_pmu_event_start(struct perf_event *event, int mode)
+{
+ local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
+}
+
+static void cstate_pmu_event_stop(struct perf_event *event, int mode)
+{
+ cstate_pmu_event_update(event);
+}
+
+static void cstate_pmu_event_del(struct perf_event *event, int mode)
+{
+ cstate_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int cstate_pmu_event_add(struct perf_event *event, int mode)
+{
+ if (mode & PERF_EF_START)
+ cstate_pmu_event_start(event, mode);
+
+ return 0;
+}
+
+static void cstate_cpu_exit(int cpu)
+{
+ int i, id, target;
+
+ /* cpu exit for cstate core */
+ if (has_cstate_core) {
+ id = topology_core_id(cpu);
+ target = -1;
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (id == topology_core_id(i)) {
+ target = i;
+ break;
+ }
+ }
+ if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+ cpumask_set_cpu(target, &cstate_core_cpu_mask);
+ WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
+ if (target >= 0)
+ perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+ }
+
+ /* cpu exit for cstate pkg */
+ if (has_cstate_pkg) {
+ id = topology_physical_package_id(cpu);
+ target = -1;
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (id == topology_physical_package_id(i)) {
+ target = i;
+ break;
+ }
+ }
+ if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+ cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
+ WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
+ if (target >= 0)
+ perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+ }
+}
+
+static void cstate_cpu_init(int cpu)
+{
+ int i, id;
+
+ /* cpu init for cstate core */
+ if (has_cstate_core) {
+ id = topology_core_id(cpu);
+ for_each_cpu(i, &cstate_core_cpu_mask) {
+ if (id == topology_core_id(i))
+ break;
+ }
+ if (i >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+ }
+
+ /* cpu init for cstate pkg */
+ if (has_cstate_pkg) {
+ id = topology_physical_package_id(cpu);
+ for_each_cpu(i, &cstate_pkg_cpu_mask) {
+ if (id == topology_physical_package_id(i))
+ break;
+ }
+ if (i >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+ }
+}
+
+static int cstate_cpu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ break;
+ case CPU_STARTING:
+ cstate_cpu_init(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DYING:
+ break;
+ case CPU_ONLINE:
+ case CPU_DEAD:
+ break;
+ case CPU_DOWN_PREPARE:
+ cstate_cpu_exit(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there is no available events.
+ */
+static bool cstate_probe_msr(struct perf_cstate_msr *msr,
+ struct attribute **events_attrs,
+ int max_event_nr)
+{
+ int i, j = 0;
+ u64 val;
+
+ /* Probe the cstate events. */
+ for (i = 0; i < max_event_nr; i++) {
+ if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+ msr[i].attr = NULL;
+ }
+
+ /* List remaining events in the sysfs attrs. */
+ for (i = 0; i < max_event_nr; i++) {
+ if (msr[i].attr)
+ events_attrs[j++] = &msr[i].attr->attr.attr;
+ }
+ events_attrs[j] = NULL;
+
+ return (j > 0) ? true : false;
+}
+
+static int __init cstate_init(void)
+{
+ /* SLM has different MSR for PKG C6 */
+ switch (boot_cpu_data.x86_model) {
+ case 55:
+ case 76:
+ case 77:
+ pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+ }
+
+ if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
+ has_cstate_core = true;
+
+ if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
+ has_cstate_pkg = true;
+
+ return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static void __init cstate_cpumask_init(void)
+{
+ int cpu;
+
+ cpu_notifier_register_begin();
+
+ for_each_online_cpu(cpu)
+ cstate_cpu_init(cpu);
+
+ __perf_cpu_notifier(cstate_cpu_notifier);
+
+ cpu_notifier_register_done();
+}
+
+static struct pmu cstate_core_pmu = {
+ .attr_groups = core_attr_groups,
+ .name = "cstate_core",
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cstate_pmu_event_init,
+ .add = cstate_pmu_event_add, /* must have */
+ .del = cstate_pmu_event_del, /* must have */
+ .start = cstate_pmu_event_start,
+ .stop = cstate_pmu_event_stop,
+ .read = cstate_pmu_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static struct pmu cstate_pkg_pmu = {
+ .attr_groups = pkg_attr_groups,
+ .name = "cstate_pkg",
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cstate_pmu_event_init,
+ .add = cstate_pmu_event_add, /* must have */
+ .del = cstate_pmu_event_del, /* must have */
+ .start = cstate_pmu_event_start,
+ .stop = cstate_pmu_event_stop,
+ .read = cstate_pmu_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static void __init cstate_pmus_register(void)
+{
+ int err;
+
+ if (has_cstate_core) {
+ err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
+ if (WARN_ON(err))
+ pr_info("Failed to register PMU %s error %d\n",
+ cstate_core_pmu.name, err);
+ }
+
+ if (has_cstate_pkg) {
+ err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+ if (WARN_ON(err))
+ pr_info("Failed to register PMU %s error %d\n",
+ cstate_pkg_pmu.name, err);
+ }
+}
+
+static int __init cstate_pmu_init(void)
+{
+ int err;
+
+ if (cpu_has_hypervisor)
+ return -ENODEV;
+
+ err = cstate_init();
+ if (err)
+ return err;
+
+ cstate_cpumask_init();
+
+ cstate_pmus_register();
+
+ return 0;
+}
+
+device_initcall(cstate_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 84f236ab96b0..5db1c7755548 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -510,10 +510,11 @@ int intel_pmu_drain_bts_buffer(void)
u64 flags;
};
struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
- struct bts_record *at, *top;
+ struct bts_record *at, *base, *top;
struct perf_output_handle handle;
struct perf_event_header header;
struct perf_sample_data data;
+ unsigned long skip = 0;
struct pt_regs regs;
if (!event)
@@ -522,10 +523,10 @@ int intel_pmu_drain_bts_buffer(void)
if (!x86_pmu.bts_active)
return 0;
- at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
- top = (struct bts_record *)(unsigned long)ds->bts_index;
+ base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+ top = (struct bts_record *)(unsigned long)ds->bts_index;
- if (top <= at)
+ if (top <= base)
return 0;
memset(&regs, 0, sizeof(regs));
@@ -535,16 +536,43 @@ int intel_pmu_drain_bts_buffer(void)
perf_sample_data_init(&data, 0, event->hw.last_period);
/*
+ * BTS leaks kernel addresses in branches across the cpl boundary,
+ * such as traps or system calls, so unless the user is asking for
+ * kernel tracing (and right now it's not possible), we'd need to
+ * filter them out. But first we need to count how many of those we
+ * have in the current batch. This is an extra O(n) pass, however,
+ * it's much faster than the other one especially considering that
+ * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
+ * alloc_bts_buffer()).
+ */
+ for (at = base; at < top; at++) {
+ /*
+ * Note that right now *this* BTS code only works if
+ * attr::exclude_kernel is set, but let's keep this extra
+ * check here in case that changes.
+ */
+ if (event->attr.exclude_kernel &&
+ (kernel_ip(at->from) || kernel_ip(at->to)))
+ skip++;
+ }
+
+ /*
* Prepare a generic sample, i.e. fill in the invariant fields.
* We will overwrite the from and to address before we output
* the sample.
*/
perf_prepare_sample(&header, &data, event, &regs);
- if (perf_output_begin(&handle, event, header.size * (top - at)))
+ if (perf_output_begin(&handle, event, header.size *
+ (top - base - skip)))
return 1;
- for (; at < top; at++) {
+ for (at = base; at < top; at++) {
+ /* Filter out any records that contain kernel addresses. */
+ if (event->attr.exclude_kernel &&
+ (kernel_ip(at->from) || kernel_ip(at->to)))
+ continue;
+
data.ip = at->from;
data.addr = at->to;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index b2c9475b7ff2..bfd0b717e944 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -151,10 +151,10 @@ static void __intel_pmu_lbr_enable(bool pmi)
* No need to reprogram LBR_SELECT in a PMI, as it
* did not change.
*/
- if (cpuc->lbr_sel && !pmi) {
+ if (cpuc->lbr_sel)
lbr_select = cpuc->lbr_sel->config;
+ if (!pmi)
wrmsrl(MSR_LBR_SELECT, lbr_select);
- }
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
orig_debugctl = debugctl;
@@ -555,6 +555,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
mask |= X86_BR_IND_JMP;
+ if (br_type & PERF_SAMPLE_BRANCH_CALL)
+ mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
@@ -890,6 +892,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
};
static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -905,6 +908,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
| LBR_RETURN | LBR_CALL_STACK,
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
};
/* core */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 42169283448b..868e1194337f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -139,9 +139,6 @@ static int __init pt_pmu_hw_init(void)
long i;
attrs = NULL;
- ret = -ENODEV;
- if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
- goto fail;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i,
@@ -1130,6 +1127,10 @@ static __init int pt_init(void)
int ret, cpu, prior_warn = 0;
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+
+ if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+ return -ENODEV;
+
get_online_cpus();
for_each_online_cpu(cpu) {
u64 ctl;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 560e5255b15e..61215a69b03d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
static bool pcidrv_registered;
struct pci_driver *uncore_pci_driver;
/* pci bus to socket mapping */
-int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
static DEFINE_RAW_SPINLOCK(uncore_box_lock);
@@ -20,6 +21,59 @@ static struct event_constraint uncore_constraint_fixed =
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
+int uncore_pcibus_to_physid(struct pci_bus *bus)
+{
+ struct pci2phy_map *map;
+ int phys_id = -1;
+
+ raw_spin_lock(&pci2phy_map_lock);
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ if (map->segment == pci_domain_nr(bus)) {
+ phys_id = map->pbus_to_physid[bus->number];
+ break;
+ }
+ }
+ raw_spin_unlock(&pci2phy_map_lock);
+
+ return phys_id;
+}
+
+struct pci2phy_map *__find_pci2phy_map(int segment)
+{
+ struct pci2phy_map *map, *alloc = NULL;
+ int i;
+
+ lockdep_assert_held(&pci2phy_map_lock);
+
+lookup:
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ if (map->segment == segment)
+ goto end;
+ }
+
+ if (!alloc) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+ raw_spin_lock(&pci2phy_map_lock);
+
+ if (!alloc)
+ return NULL;
+
+ goto lookup;
+ }
+
+ map = alloc;
+ alloc = NULL;
+ map->segment = segment;
+ for (i = 0; i < 256; i++)
+ map->pbus_to_physid[i] = -1;
+ list_add_tail(&map->list, &pci2phy_map_head);
+
+end:
+ kfree(alloc);
+ return map;
+}
+
ssize_t uncore_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -809,7 +863,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
int phys_id;
bool first_box = false;
- phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+ phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
@@ -856,9 +910,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box = pci_get_drvdata(pdev);
struct intel_uncore_pmu *pmu;
- int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+ int i, cpu, phys_id;
bool last_box = false;
+ phys_id = uncore_pcibus_to_physid(pdev->bus);
box = pci_get_drvdata(pdev);
if (!box) {
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 72c54c2e5b1a..2f0a4a98e16b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -117,6 +117,15 @@ struct uncore_event_desc {
const char *config;
};
+struct pci2phy_map {
+ struct list_head list;
+ int segment;
+ int pbus_to_physid[256];
+};
+
+int uncore_pcibus_to_physid(struct pci_bus *bus);
+struct pci2phy_map *__find_pci2phy_map(int segment);
+
ssize_t uncore_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
@@ -317,7 +326,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores;
extern struct pci_driver *uncore_pci_driver;
-extern int uncore_pcibus_to_physid[256];
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
extern struct event_constraint uncore_constraint_empty;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index f78574b3cb55..845256158a10 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -420,15 +420,25 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
static int snb_pci2phy_map_init(int devid)
{
struct pci_dev *dev = NULL;
- int bus;
+ struct pci2phy_map *map;
+ int bus, segment;
dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
if (!dev)
return -ENOTTY;
bus = dev->bus->number;
-
- uncore_pcibus_to_physid[bus] = 0;
+ segment = pci_domain_nr(dev->bus);
+
+ raw_spin_lock(&pci2phy_map_lock);
+ map = __find_pci2phy_map(segment);
+ if (!map) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ pci_dev_put(dev);
+ return -ENOMEM;
+ }
+ map->pbus_to_physid[bus] = 0;
+ raw_spin_unlock(&pci2phy_map_lock);
pci_dev_put(dev);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 694510a887dc..f0f4fcba252e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -1087,7 +1087,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
static int snbep_pci2phy_map_init(int devid)
{
struct pci_dev *ubox_dev = NULL;
- int i, bus, nodeid;
+ int i, bus, nodeid, segment;
+ struct pci2phy_map *map;
int err = 0;
u32 config = 0;
@@ -1106,16 +1107,27 @@ static int snbep_pci2phy_map_init(int devid)
err = pci_read_config_dword(ubox_dev, 0x54, &config);
if (err)
break;
+
+ segment = pci_domain_nr(ubox_dev->bus);
+ raw_spin_lock(&pci2phy_map_lock);
+ map = __find_pci2phy_map(segment);
+ if (!map) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ err = -ENOMEM;
+ break;
+ }
+
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
*/
for (i = 0; i < 8; i++) {
if (nodeid == ((config >> (3 * i)) & 0x7)) {
- uncore_pcibus_to_physid[bus] = i;
+ map->pbus_to_physid[bus] = i;
break;
}
}
+ raw_spin_unlock(&pci2phy_map_lock);
}
if (!err) {
@@ -1123,13 +1135,17 @@ static int snbep_pci2phy_map_init(int devid)
* For PCI bus with no UBOX device, find the next bus
* that has UBOX device and use its mapping.
*/
- i = -1;
- for (bus = 255; bus >= 0; bus--) {
- if (uncore_pcibus_to_physid[bus] >= 0)
- i = uncore_pcibus_to_physid[bus];
- else
- uncore_pcibus_to_physid[bus] = i;
+ raw_spin_lock(&pci2phy_map_lock);
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ i = -1;
+ for (bus = 255; bus >= 0; bus--) {
+ if (map->pbus_to_physid[bus] >= 0)
+ i = map->pbus_to_physid[bus];
+ else
+ map->pbus_to_physid[bus] = i;
+ }
}
+ raw_spin_unlock(&pci2phy_map_lock);
}
pci_dev_put(ubox_dev);
@@ -2444,7 +2460,7 @@ static struct intel_uncore_type *bdx_pci_uncores[] = {
NULL,
};
-static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = {
+static const struct pci_device_id bdx_uncore_pci_ids[] = {
{ /* Home Agent 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 9f9cc682e561..db9a675e751b 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -584,7 +584,7 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
static void __init force_disable_hpet(int num, int slot, int func)
{
#ifdef CONFIG_HPET_TIMER
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n");
#endif
}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 88b4da373081..b8e6ff5cd5d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -37,10 +37,10 @@
*/
unsigned long hpet_address;
u8 hpet_blockid; /* OS timer block num */
-u8 hpet_msi_disable;
+bool hpet_msi_disable;
#ifdef CONFIG_PCI_MSI
-static unsigned long hpet_num_timers;
+static unsigned int hpet_num_timers;
#endif
static void __iomem *hpet_virt_address;
@@ -86,9 +86,9 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-int boot_hpet_disable;
-int hpet_force_user;
-static int hpet_verbose;
+bool boot_hpet_disable;
+bool hpet_force_user;
+static bool hpet_verbose;
static int __init hpet_setup(char *str)
{
@@ -98,11 +98,11 @@ static int __init hpet_setup(char *str)
if (next)
*next++ = 0;
if (!strncmp("disable", str, 7))
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
if (!strncmp("force", str, 5))
- hpet_force_user = 1;
+ hpet_force_user = true;
if (!strncmp("verbose", str, 7))
- hpet_verbose = 1;
+ hpet_verbose = true;
str = next;
}
return 1;
@@ -111,7 +111,7 @@ __setup("hpet=", hpet_setup);
static int __init disable_hpet(char *str)
{
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
return 1;
}
__setup("nohpet", disable_hpet);
@@ -124,7 +124,7 @@ static inline int is_hpet_capable(void)
/*
* HPET timer interrupt enable / disable
*/
-static int hpet_legacy_int_enabled;
+static bool hpet_legacy_int_enabled;
/**
* is_hpet_enabled - check whether the hpet timer interrupt is enabled
@@ -230,7 +230,7 @@ static struct clock_event_device hpet_clockevent;
static void hpet_stop_counter(void)
{
- unsigned long cfg = hpet_readl(HPET_CFG);
+ u32 cfg = hpet_readl(HPET_CFG);
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -272,7 +272,7 @@ static void hpet_enable_legacy_int(void)
cfg |= HPET_CFG_LEGACY;
hpet_writel(cfg, HPET_CFG);
- hpet_legacy_int_enabled = 1;
+ hpet_legacy_int_enabled = true;
}
static void hpet_legacy_clockevent_register(void)
@@ -983,7 +983,7 @@ void hpet_disable(void)
cfg = *hpet_boot_cfg;
else if (hpet_legacy_int_enabled) {
cfg &= ~HPET_CFG_LEGACY;
- hpet_legacy_int_enabled = 0;
+ hpet_legacy_int_enabled = false;
}
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
@@ -1121,8 +1121,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
static void hpet_disable_rtc_channel(void)
{
- unsigned long cfg;
- cfg = hpet_readl(HPET_T1_CFG);
+ u32 cfg = hpet_readl(HPET_T1_CFG);
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T1_CFG);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1b55de1267cf..cd99433b8ba1 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -131,11 +131,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
+ if (!*dev)
+ *dev = &x86_dma_fallback_dev;
+
*gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
*gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
- if (!*dev)
- *dev = &x86_dma_fallback_dev;
if (!is_device_dma_capable(*dev))
return false;
return true;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 39e585a554b7..9f7c21c22477 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -84,6 +84,9 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
memcpy(dst, src, arch_task_struct_size);
+#ifdef CONFIG_VM86
+ dst->thread.vm86 = NULL;
+#endif
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}
@@ -550,14 +553,14 @@ unsigned long get_wchan(struct task_struct *p)
if (sp < bottom || sp > top)
return 0;
- fp = READ_ONCE(*(unsigned long *)sp);
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
do {
if (fp < bottom || fp > top)
return 0;
- ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long)));
+ ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
if (!in_sched_functions(ip))
return ip;
- fp = READ_ONCE(*(unsigned long *)fp);
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 176a0f99d4da..cc457ff818ad 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -524,7 +524,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
*/
static void force_disable_hpet_msi(struct pci_dev *unused)
{
- hpet_msi_disable = 1;
+ hpet_msi_disable = true;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3a896109e1df..a1e4da98c8f0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1085,8 +1085,10 @@ void __init setup_arch(char **cmdline_p)
memblock_set_current_limit(ISA_END_ADDRESS);
memblock_x86_fill();
- if (efi_enabled(EFI_BOOT))
+ if (efi_enabled(EFI_BOOT)) {
+ efi_fake_memmap();
efi_find_mirror();
+ }
/*
* The EFI specification says that boot service code won't be called
@@ -1179,6 +1181,14 @@ void __init setup_arch(char **cmdline_p)
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
+
+ /*
+ * sync back low identity map too. It is used for example
+ * in the 32-bit EFI stub.
+ */
+ clone_pgd_range(initial_page_table,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
#endif
tboot_probe();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e0c198e5f920..892ee2e5ecbc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid)
*/
#define UDELAY_10MS_DEFAULT 10000
-static unsigned int init_udelay = UDELAY_10MS_DEFAULT;
+static unsigned int init_udelay = INT_MAX;
static int __init cpu_init_udelay(char *str)
{
@@ -522,13 +522,16 @@ early_param("cpu_init_udelay", cpu_init_udelay);
static void __init smp_quirk_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
- if (init_udelay != UDELAY_10MS_DEFAULT)
+ if (init_udelay != INT_MAX)
return;
/* if modern processor, use no delay */
if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
init_udelay = 0;
+
+ /* else, use legacy delay */
+ init_udelay = UDELAY_10MS_DEFAULT;
}
/*
@@ -657,7 +660,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(300);
pr_debug("Startup point 1\n");
@@ -668,7 +673,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(200);
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c3f7602cd038..c7c4d9c51e99 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -168,21 +168,20 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
- * into a shift.
+ * into a shift. The larger SC is, the more accurate the conversion, but
+ * cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication
+ * (64-bit result) can be used.
*
- * We can use khz divisor instead of mhz to keep a better precision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * We can use khz divisor instead of mhz to keep a better precision.
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
static void cyc2ns_data_init(struct cyc2ns_data *data)
{
data->cyc2ns_mul = 0;
- data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+ data->cyc2ns_shift = 0;
data->cyc2ns_offset = 0;
data->__count = 0;
}
@@ -216,14 +215,14 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
if (likely(data == tail)) {
ns = data->cyc2ns_offset;
- ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+ ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
} else {
data->__count++;
barrier();
ns = data->cyc2ns_offset;
- ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+ ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
barrier();
@@ -257,12 +256,22 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
* time function is continuous; see the comment near struct
* cyc2ns_data.
*/
- data->cyc2ns_mul =
- DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
- cpu_khz);
- data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+ clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+ NSEC_PER_MSEC, 0);
+
+ /*
+ * cyc2ns_shift is exported via arch_perf_update_userpage() where it is
+ * not expected to be greater than 31 due to the original published
+ * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
+ * value) - refer perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (data->cyc2ns_shift == 32) {
+ data->cyc2ns_shift = 31;
+ data->cyc2ns_mul >>= 1;
+ }
+
data->cyc2ns_offset = ns_now -
- mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+ mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
cyc2ns_write_end(cpu, data);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b372a7557c16..9da95b9daf8d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2418,7 +2418,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
u64 val, cr0, cr4;
u32 base3;
u16 selector;
- int i;
+ int i, r;
for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
@@ -2460,13 +2460,17 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smbase, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return X86EMUL_CONTINUE;
}
static int em_rsm(struct x86_emulate_ctxt *ctxt)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06ef4908ba61..6a8bc64566ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4105,17 +4105,13 @@ static void seg_setup(int seg)
static int alloc_apic_access_page(struct kvm *kvm)
{
struct page *page;
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done)
goto out;
- kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
if (r)
goto out;
@@ -4140,17 +4136,12 @@ static int alloc_identity_pagetable(struct kvm *kvm)
{
/* Called with kvm->slots_lock held. */
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
BUG_ON(kvm->arch.ept_identity_pagetable_done);
- kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr =
- kvm->arch.ept_identity_map_addr;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+ kvm->arch.ept_identity_map_addr, PAGE_SIZE);
return r;
}
@@ -4949,14 +4940,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
- struct kvm_userspace_memory_region tss_mem = {
- .slot = TSS_PRIVATE_MEMSLOT,
- .guest_phys_addr = addr,
- .memory_size = PAGE_SIZE * 3,
- .flags = 0,
- };
- ret = x86_set_memory_region(kvm, &tss_mem);
+ ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
+ PAGE_SIZE * 3);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 92511d4b7236..9a9a19830321 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6453,6 +6453,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
return 1;
}
+static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+ !vcpu->arch.apf.halted);
+}
+
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
@@ -6461,8 +6467,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
for (;;) {
- if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
+ if (kvm_vcpu_running(vcpu))
r = vcpu_enter_guest(vcpu);
else
r = vcpu_block(kvm, vcpu);
@@ -7474,34 +7479,66 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int i, r;
+ unsigned long hva;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *slot, old;
/* Called with kvm->slots_lock held. */
- BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
+ return -EINVAL;
+
+ slot = id_to_memslot(slots, id);
+ if (size) {
+ if (WARN_ON(slot->npages))
+ return -EEXIST;
+
+ /*
+ * MAP_SHARED to prevent internal slot pages from being moved
+ * by fork()/COW.
+ */
+ hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, 0);
+ if (IS_ERR((void *)hva))
+ return PTR_ERR((void *)hva);
+ } else {
+ if (!slot->npages)
+ return 0;
+ hva = 0;
+ }
+
+ old = *slot;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_userspace_memory_region m = *mem;
+ struct kvm_userspace_memory_region m;
- m.slot |= i << 16;
+ m.slot = id | (i << 16);
+ m.flags = 0;
+ m.guest_phys_addr = gpa;
+ m.userspace_addr = hva;
+ m.memory_size = size;
r = __kvm_set_memory_region(kvm, &m);
if (r < 0)
return r;
}
+ if (!size) {
+ r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
+ WARN_ON(r < 0);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(__x86_set_memory_region);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int r;
mutex_lock(&kvm->slots_lock);
- r = __x86_set_memory_region(kvm, mem);
+ r = __x86_set_memory_region(kvm, id, gpa, size);
mutex_unlock(&kvm->slots_lock);
return r;
@@ -7516,16 +7553,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
* unless the the memory map has changed due to process exit
* or fd copying.
*/
- struct kvm_userspace_memory_region mem;
- memset(&mem, 0, sizeof(mem));
- mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = TSS_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
@@ -7628,27 +7658,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- /*
- * Only private memory slots need to be mapped here since
- * KVM_SET_MEMORY_REGION ioctl is no longer supported.
- */
- if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
- unsigned long userspace_addr;
-
- /*
- * MAP_SHARED to prevent internal slot pages from being moved
- * by fork()/COW.
- */
- userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, 0);
-
- if (IS_ERR((void *)userspace_addr))
- return PTR_ERR((void *)userspace_addr);
-
- memslot->userspace_addr = userspace_addr;
- }
-
return 0;
}
@@ -7710,17 +7719,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int nr_mmu_pages = 0;
- if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
- int ret;
-
- ret = vm_munmap(old->userspace_addr,
- old->npages * PAGE_SIZE);
- if (ret < 0)
- printk(KERN_WARNING
- "kvm_vm_ioctl_set_memory_region: "
- "failed to munmap memory\n");
- }
-
if (!kvm->arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -7769,19 +7767,36 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvm_mmu_invalidate_zap_all_pages(kvm);
}
+static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+{
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+
+ if (kvm_apic_has_events(vcpu))
+ return true;
+
+ if (vcpu->arch.pv.pv_unhalted)
+ return true;
+
+ if (atomic_read(&vcpu->arch.nmi_queued))
+ return true;
+
+ if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+ return true;
+
+ if (kvm_arch_interrupt_allowed(vcpu) &&
+ kvm_cpu_has_interrupt(vcpu))
+ return true;
+
+ return false;
+}
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
kvm_x86_ops->check_nested_events(vcpu, false);
- return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
- || !list_empty_careful(&vcpu->async_pf.done)
- || kvm_apic_has_events(vcpu)
- || vcpu->arch.pv.pv_unhalted
- || atomic_read(&vcpu->arch.nmi_queued) ||
- (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu));
+ return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 816488c0b97e..d388de72eaca 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -353,8 +353,12 @@ AVXcode: 1
17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
18: Grp16 (1A)
19:
-1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
-1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c:
1d:
1e:
@@ -732,6 +736,12 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
# 0x0f 0x38 0xc0-0xff
+c8: sha1nexte Vdq,Wdq
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq
+cb: sha256rnds2 Vdq,Wdq
+cc: sha256msg1 Vdq,Wdq
+cd: sha256msg2 Vdq,Wdq
db: VAESIMC Vdq,Wdq (66),(v1)
dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -790,6 +800,7 @@ AVXcode: 3
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+cc: sha1rnds4 Vdq,Wdq,Ib
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
f0: RORX Gy,Ey,Ib (F2),(v)
EndTable
@@ -874,7 +885,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5:
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -888,6 +899,9 @@ EndTable
GrpTable: Grp9
1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
EndTable
@@ -932,8 +946,8 @@ GrpTable: Grp15
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE
5: XRSTOR | lfence (11B)
-6: XSAVEOPT | mfence (11B)
-7: clflush | sfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B)
+7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
EndTable
GrpTable: Grp16
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2c44c0792301..050a092b8d9a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -647,9 +647,12 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
- if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
- PFN_DOWN(__pa(address)) + 1))
- split_page_count(level);
+ if (virt_addr_valid(address)) {
+ unsigned long pfn = PFN_DOWN(__pa(address));
+
+ if (pfn_range_is_mapped(pfn, pfn + 1))
+ split_page_count(level);
+ }
/*
* Install the new, split up pagetable.
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d7f997f7c26d..ea48449b2e63 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -50,11 +50,16 @@ void __init efi_bgrt_init(void)
bgrt_tab->version);
return;
}
- if (bgrt_tab->status != 1) {
- pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
+ if (bgrt_tab->status & 0xfe) {
+ pr_err("Ignoring BGRT: reserved status bits are non-zero %u\n",
bgrt_tab->status);
return;
}
+ if (bgrt_tab->status != 1) {
+ pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n",
+ bgrt_tab->status);
+ return;
+ }
if (bgrt_tab->image_type != 0) {
pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
bgrt_tab->image_type);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 6a28ded74211..ad285404ea7f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -194,7 +194,7 @@ static void __init do_add_efi_memmap(void)
int __init efi_memblock_x86_reserve_range(void)
{
struct efi_info *e = &boot_params.efi_info;
- unsigned long pmap;
+ phys_addr_t pmap;
if (efi_enabled(EFI_PARAVIRT))
return 0;
@@ -209,7 +209,7 @@ int __init efi_memblock_x86_reserve_range(void)
#else
pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
#endif
- memmap.phys_map = (void *)pmap;
+ memmap.phys_map = pmap;
memmap.nr_map = e->efi_memmap_size /
e->efi_memdesc_size;
memmap.desc_size = e->efi_memdesc_size;
@@ -222,7 +222,7 @@ int __init efi_memblock_x86_reserve_range(void)
return 0;
}
-static void __init print_efi_memmap(void)
+void __init efi_print_memmap(void)
{
#ifdef EFI_DEBUG
efi_memory_desc_t *md;
@@ -524,7 +524,7 @@ void __init efi_init(void)
return;
if (efi_enabled(EFI_DBG))
- print_efi_memmap();
+ efi_print_memmap();
efi_esrt_init();
}
@@ -1017,24 +1017,6 @@ u32 efi_mem_type(unsigned long phys_addr)
return 0;
}
-u64 efi_mem_attributes(unsigned long phys_addr)
-{
- efi_memory_desc_t *md;
- void *p;
-
- if (!efi_enabled(EFI_MEMMAP))
- return 0;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if ((md->phys_addr <= phys_addr) &&
- (phys_addr < (md->phys_addr +
- (md->num_pages << EFI_PAGE_SHIFT))))
- return md->attribute;
- }
- return 0;
-}
-
static int __init arch_parse_efi_cmdline(char *str)
{
if (!str) {
@@ -1044,8 +1026,6 @@ static int __init arch_parse_efi_cmdline(char *str)
if (parse_option_str(str, "old_map"))
set_bit(EFI_OLD_MEMMAP, &efi.flags);
- if (parse_option_str(str, "debug"))
- set_bit(EFI_DBG, &efi.flags);
return 0;
}
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 9701a4fd7bf2..836a1eb5df43 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -12,7 +12,10 @@
#include <skas.h>
#include <sysdep/tls.h>
-extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
+static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
+{
+ return syscall(__NR_modify_ldt, func, ptr, bytecount);
+}
static long write_ldt_entry(struct mm_id *mm_idp, int func,
struct user_desc *desc, void **addr, int done)