diff options
Diffstat (limited to 'arch')
44 files changed, 2111 insertions, 685 deletions
diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 8f4486c4415b..eceff9b75b22 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -17,6 +17,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs KBUILD_DEFCONFIG := 32r2el_defconfig +KBUILD_DTBS := dtbs # # Select the object file format to substitute into the linker script. @@ -384,7 +385,7 @@ quiet_cmd_64 = OBJCOPY $@ vmlinux.64: vmlinux $(call cmd,64) -all: $(all-y) +all: $(all-y) $(KBUILD_DTBS) # boot $(boot-y): $(vmlinux-32) FORCE diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 3c453a1f1ff1..172801ed35b8 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -78,6 +78,8 @@ OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE $(call if_changed,objcopy) +HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE) + # Calculate the load address of the compressed kernel image hostprogs-y := calc_vmlinuz_load_addr diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 240f1d12df75..080b926d2623 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -9,7 +9,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include "../../../../include/linux/sizes.h" +#include <linux/sizes.h> int main(int argc, char *argv[]) { diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart.h b/arch/mips/include/asm/mach-ath79/ar933x_uart.h index b8f8af7dc47c..cacf3545e018 100644 --- a/arch/mips/include/asm/mach-ath79/ar933x_uart.h +++ b/arch/mips/include/asm/mach-ath79/ar933x_uart.h @@ -24,8 +24,8 @@ #define AR933X_UART_CS_PARITY_S 0 #define AR933X_UART_CS_PARITY_M 0x3 #define AR933X_UART_CS_PARITY_NONE 0 -#define AR933X_UART_CS_PARITY_ODD 1 -#define AR933X_UART_CS_PARITY_EVEN 2 +#define AR933X_UART_CS_PARITY_ODD 2 +#define AR933X_UART_CS_PARITY_EVEN 3 #define AR933X_UART_CS_IF_MODE_S 2 #define AR933X_UART_CS_IF_MODE_M 0x3 #define AR933X_UART_CS_IF_MODE_NONE 0 diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 50ee7213b432..d79f2b432318 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -203,7 +203,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) bool __virt_addr_valid(const volatile void *kaddr) { - unsigned long vaddr = (unsigned long)vaddr; + unsigned long vaddr = (unsigned long)kaddr; if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) return false; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 65b6e85447b1..144ceb0fba88 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -391,6 +391,7 @@ static struct work_registers build_get_work_registers(u32 **p) static void build_restore_work_registers(u32 **p) { if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); return; } @@ -668,10 +669,12 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -938,10 +941,12 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { uasm_i_nop(p); } @@ -1258,6 +1263,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ if (c0_scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); build_tlb_write_entry(p, l, r, tlb_random); uasm_l_leave(l, *p); @@ -1603,15 +1609,17 @@ static void build_setup_pgd(void) uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); uasm_l_tlbl_goaround1(&l, p); UASM_i_SLL(&p, a0, a0, 11); - uasm_i_jr(&p, 31); UASM_i_MTC0(&p, a0, C0_CONTEXT); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } else { /* PGD in c0_KScratch */ - uasm_i_jr(&p, 31); if (cpu_has_ldpte) UASM_i_MTC0(&p, a0, C0_PWBASE); else UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } #else #ifdef CONFIG_SMP @@ -1625,13 +1633,16 @@ static void build_setup_pgd(void) UASM_i_LA_mostly(&p, a2, pgdc); UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); #endif /* SMP */ - uasm_i_jr(&p, 31); /* if pgd_reg is allocated, save PGD also to scratch register */ - if (pgd_reg != -1) + if (pgd_reg != -1) { UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); - else + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } else { + uasm_i_jr(&p, 31); uasm_i_nop(&p); + } #endif if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) panic("tlbmiss_handler_setup_pgd space exceeded"); diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6adce15268bd..8e29c991ba3e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -480,3 +480,16 @@ config CPU_SUP_UMC_32 CPU might render the kernel unbootable. If unsure, say N. + +config CPU_SUP_ZHAOXIN + default y + bool "Support Zhaoxin processors" if PROCESSOR_SELECT + help + This enables detection, tunings and quirks for Zhaoxin processors + + You need this enabled if you want your kernel to run on a + Zhaoxin CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin + CPU might render the kernel unbootable. + + If unsure, say N. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 11aa3b2afa4d..3b7a0e8d3bc0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1670,11 +1670,17 @@ nmi_restore: iretq END(nmi) +#ifndef CONFIG_IA32_EMULATION +/* + * This handles SYSCALL from 32-bit code. There is no way to program + * MSRs to fully disable 32-bit SYSCALL. + */ ENTRY(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret END(ignore_sysret) +#endif ENTRY(rewind_stack_do_exit) UNWIND_HINT_FUNC diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 9cbfd34042d5..9e07f554333f 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += core.o +obj-y += core.o probe.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3cd94a21bd53..ffc015bd257e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1618,68 +1618,6 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = { .attrs = NULL, }; -/* - * Remove all undefined events (x86_pmu.event_map(id) == 0) - * out of events_attr attributes. - */ -static void __init filter_events(struct attribute **attrs) -{ - struct device_attribute *d; - struct perf_pmu_events_attr *pmu_attr; - int offset = 0; - int i, j; - - for (i = 0; attrs[i]; i++) { - d = (struct device_attribute *)attrs[i]; - pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); - /* str trumps id */ - if (pmu_attr->event_str) - continue; - if (x86_pmu.event_map(i + offset)) - continue; - - for (j = i; attrs[j]; j++) - attrs[j] = attrs[j + 1]; - - /* Check the shifted attr. */ - i--; - - /* - * event_map() is index based, the attrs array is organized - * by increasing event index. If we shift the events, then - * we need to compensate for the event_map(), otherwise - * we are looking up the wrong event in the map - */ - offset++; - } -} - -/* Merge two pointer arrays */ -__init struct attribute **merge_attr(struct attribute **a, struct attribute **b) -{ - struct attribute **new; - int j, i; - - for (j = 0; a && a[j]; j++) - ; - for (i = 0; b && b[i]; i++) - j++; - j++; - - new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL); - if (!new) - return NULL; - - j = 0; - for (i = 0; a && a[i]; i++) - new[j++] = a[i]; - for (i = 0; b && b[i]; i++) - new[j++] = b[i]; - new[j] = NULL; - - return new; -} - ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ @@ -1744,9 +1682,24 @@ static struct attribute *events_attr[] = { NULL, }; +/* + * Remove all undefined events (x86_pmu.event_map(id) == 0) + * out of events_attr attributes. + */ +static umode_t +is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + /* str trumps id */ + return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; +} + static struct attribute_group x86_pmu_events_group __ro_after_init = { .name = "events", .attrs = events_attr, + .is_visible = is_visible, }; ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) @@ -1842,37 +1795,10 @@ static int __init init_hw_perf_events(void) x86_pmu_format_group.attrs = x86_pmu.format_attrs; - if (x86_pmu.caps_attrs) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs); - if (!WARN_ON(!tmp)) - x86_pmu_caps_group.attrs = tmp; - } - - if (x86_pmu.event_attrs) - x86_pmu_events_group.attrs = x86_pmu.event_attrs; - if (!x86_pmu.events_sysfs_show) x86_pmu_events_group.attrs = &empty_attrs; - else - filter_events(x86_pmu_events_group.attrs); - if (x86_pmu.cpu_events) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); - if (!WARN_ON(!tmp)) - x86_pmu_events_group.attrs = tmp; - } - - if (x86_pmu.attrs) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs); - if (!WARN_ON(!tmp)) - x86_pmu_attr_group.attrs = tmp; - } + pmu.attr_update = x86_pmu.attr_update; pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a5436cee20b1..bda450ff51ee 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -20,6 +20,7 @@ #include <asm/intel-family.h> #include <asm/apic.h> #include <asm/cpu_device_id.h> +#include <asm/hypervisor.h> #include "../perf_event.h" @@ -3897,8 +3898,6 @@ static __initconst const struct x86_pmu core_pmu = { .check_period = intel_pmu_check_period, }; -static struct attribute *intel_pmu_attrs[]; - static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -3930,8 +3929,6 @@ static __initconst const struct x86_pmu intel_pmu = { .format_attrs = intel_arch3_formats_attr, .events_sysfs_show = intel_event_sysfs_show, - .attrs = intel_pmu_attrs, - .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, @@ -4055,6 +4052,13 @@ static bool check_msr(unsigned long msr, u64 mask) u64 val_old, val_new, val_tmp; /* + * Disable the check for real HW, so we don't + * mess with potentionaly enabled registers: + */ + if (hypervisor_is_type(X86_HYPER_NATIVE)) + return true; + + /* * Read the current value, change it and read it back to see if it * matches, this is needed to detect certain hardware emulators * (qemu/kvm) that don't trap on the MSR access and always return 0s. @@ -4274,13 +4278,6 @@ static struct attribute *icl_tsx_events_attrs[] = { NULL, }; -static __init struct attribute **get_icl_events_attrs(void) -{ - return boot_cpu_has(X86_FEATURE_RTM) ? - merge_attr(icl_events_attrs, icl_tsx_events_attrs) : - icl_events_attrs; -} - static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4402,43 +4399,111 @@ static DEVICE_ATTR(allow_tsx_force_abort, 0644, static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, - NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */ + &dev_attr_allow_tsx_force_abort.attr, NULL, }; -static __init struct attribute ** -get_events_attrs(struct attribute **base, - struct attribute **mem, - struct attribute **tsx) +static umode_t +tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - struct attribute **attrs = base; - struct attribute **old; + return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; +} - if (mem && x86_pmu.pebs) - attrs = merge_attr(attrs, mem); +static umode_t +pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.pebs ? attr->mode : 0; +} - if (tsx && boot_cpu_has(X86_FEATURE_RTM)) { - old = attrs; - attrs = merge_attr(attrs, tsx); - if (old != base) - kfree(old); - } +static umode_t +lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.lbr_nr ? attr->mode : 0; +} - return attrs; +static umode_t +exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.version >= 2 ? attr->mode : 0; } +static umode_t +default_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + if (attr == &dev_attr_allow_tsx_force_abort.attr) + return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; + + return attr->mode; +} + +static struct attribute_group group_events_td = { + .name = "events", +}; + +static struct attribute_group group_events_mem = { + .name = "events", + .is_visible = pebs_is_visible, +}; + +static struct attribute_group group_events_tsx = { + .name = "events", + .is_visible = tsx_is_visible, +}; + +static struct attribute_group group_caps_gen = { + .name = "caps", + .attrs = intel_pmu_caps_attrs, +}; + +static struct attribute_group group_caps_lbr = { + .name = "caps", + .attrs = lbr_attrs, + .is_visible = lbr_is_visible, +}; + +static struct attribute_group group_format_extra = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_format_extra_skl = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_default = { + .attrs = intel_pmu_attrs, + .is_visible = default_is_visible, +}; + +static const struct attribute_group *attr_update[] = { + &group_events_td, + &group_events_mem, + &group_events_tsx, + &group_caps_gen, + &group_caps_lbr, + &group_format_extra, + &group_format_extra_skl, + &group_default, + NULL, +}; + +static struct attribute *empty_attrs; + __init int intel_pmu_init(void) { - struct attribute **extra_attr = NULL; - struct attribute **mem_attr = NULL; - struct attribute **tsx_attr = NULL; - struct attribute **to_free = NULL; + struct attribute **extra_skl_attr = &empty_attrs; + struct attribute **extra_attr = &empty_attrs; + struct attribute **td_attr = &empty_attrs; + struct attribute **mem_attr = &empty_attrs; + struct attribute **tsx_attr = &empty_attrs; union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; struct extra_reg *er; + bool pmem = false; int version, i; char *name; @@ -4596,7 +4661,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.cpu_events = slm_events_attrs; + td_attr = slm_events_attrs; extra_attr = slm_format_attr; pr_cont("Silvermont events, "); name = "silvermont"; @@ -4624,7 +4689,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.cpu_events = glm_events_attrs; + td_attr = glm_events_attrs; extra_attr = slm_format_attr; pr_cont("Goldmont events, "); name = "goldmont"; @@ -4651,7 +4716,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.get_event_constraints = glp_get_event_constraints; - x86_pmu.cpu_events = glm_events_attrs; + td_attr = glm_events_attrs; /* Goldmont Plus has 4-wide pipeline */ event_attr_td_total_slots_scale_glm.event_str = "4"; extra_attr = slm_format_attr; @@ -4740,7 +4805,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.cpu_events = snb_events_attrs; + td_attr = snb_events_attrs; mem_attr = snb_mem_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ @@ -4781,7 +4846,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.cpu_events = snb_events_attrs; + td_attr = snb_events_attrs; mem_attr = snb_mem_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ @@ -4818,10 +4883,10 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; pr_cont("Haswell events, "); @@ -4860,10 +4925,10 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.limit_period = bdw_limit_period; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; pr_cont("Broadwell events, "); @@ -4890,9 +4955,10 @@ __init int intel_pmu_init(void) name = "knights-landing"; break; + case INTEL_FAM6_SKYLAKE_X: + pmem = true; case INTEL_FAM6_SKYLAKE_MOBILE: case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: x86_add_quirk(intel_pebs_isolation_quirk); @@ -4920,27 +4986,28 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = hsw_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; - extra_attr = merge_attr(extra_attr, skl_format_attr); - to_free = extra_attr; - x86_pmu.cpu_events = hsw_events_attrs; + extra_skl_attr = skl_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; - intel_pmu_pebs_data_source_skl( - boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); + intel_pmu_pebs_data_source_skl(pmem); if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { x86_pmu.flags |= PMU_FL_TFA; x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; - intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr; } pr_cont("Skylake events, "); name = "skylake"; break; + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_XEON_D: + pmem = true; case INTEL_FAM6_ICELAKE_MOBILE: + case INTEL_FAM6_ICELAKE_DESKTOP: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -4959,11 +5026,12 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = icl_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; - extra_attr = merge_attr(extra_attr, skl_format_attr); - x86_pmu.cpu_events = get_icl_events_attrs(); + extra_skl_attr = skl_format_attr; + mem_attr = icl_events_attrs; + tsx_attr = icl_tsx_events_attrs; x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_skl(false); + intel_pmu_pebs_data_source_skl(pmem); pr_cont("Icelake events, "); name = "icelake"; break; @@ -4988,14 +5056,14 @@ __init int intel_pmu_init(void) snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); - if (version >= 2 && extra_attr) { - x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, - extra_attr); - WARN_ON(!x86_pmu.format_attrs); - } - x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events, - mem_attr, tsx_attr); + group_events_td.attrs = td_attr; + group_events_mem.attrs = mem_attr; + group_events_tsx.attrs = tsx_attr; + group_format_extra.attrs = extra_attr; + group_format_extra_skl.attrs = extra_skl_attr; + + x86_pmu.attr_update = attr_update; if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", @@ -5043,12 +5111,8 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } - x86_pmu.caps_attrs = intel_pmu_caps_attrs; - - if (x86_pmu.lbr_nr) { - x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs); + if (x86_pmu.lbr_nr) pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); - } /* * Access extra MSR may cause #GP under certain circumstances. @@ -5078,7 +5142,6 @@ __init int intel_pmu_init(void) if (x86_pmu.counter_freezing) x86_pmu.handle_irq = intel_pmu_handle_irq_v4; - kfree(to_free); return 0; } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 6072f92cb8ea..688592b34564 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -96,6 +96,7 @@ #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" +#include "../probe.h" MODULE_LICENSE("GPL"); @@ -144,25 +145,42 @@ enum perf_cstate_core_events { PERF_CSTATE_CORE_EVENT_MAX, }; -PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); -PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); -PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); -PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03"); -static struct perf_cstate_msr core_msr[] = { - [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 }, - [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 }, - [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 }, - [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 }, +static unsigned long core_msr_mask; + +PMU_EVENT_GROUP(events, cstate_core_c1); +PMU_EVENT_GROUP(events, cstate_core_c3); +PMU_EVENT_GROUP(events, cstate_core_c6); +PMU_EVENT_GROUP(events, cstate_core_c7); + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +static struct perf_msr core_msr[] = { + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr }, }; -static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { +static struct attribute *attrs_empty[] = { NULL, }; +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ static struct attribute_group core_events_attr_group = { .name = "events", - .attrs = core_events_attrs, + .attrs = attrs_empty, }; DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); @@ -211,31 +229,37 @@ enum perf_cstate_pkg_events { PERF_CSTATE_PKG_EVENT_MAX, }; -PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); -PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); -PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); -PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03"); -PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04"); -PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); -PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); - -static struct perf_cstate_msr pkg_msr[] = { - [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 }, - [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 }, - [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 }, - [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 }, - [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 }, - [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 }, - [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 }, -}; - -static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { - NULL, +PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04"); +PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05"); +PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06"); + +static unsigned long pkg_msr_mask; + +PMU_EVENT_GROUP(events, cstate_pkg_c2); +PMU_EVENT_GROUP(events, cstate_pkg_c3); +PMU_EVENT_GROUP(events, cstate_pkg_c6); +PMU_EVENT_GROUP(events, cstate_pkg_c7); +PMU_EVENT_GROUP(events, cstate_pkg_c8); +PMU_EVENT_GROUP(events, cstate_pkg_c9); +PMU_EVENT_GROUP(events, cstate_pkg_c10); + +static struct perf_msr pkg_msr[] = { + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, }; static struct attribute_group pkg_events_attr_group = { .name = "events", - .attrs = pkg_events_attrs, + .attrs = attrs_empty, }; DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); @@ -289,7 +313,8 @@ static int cstate_pmu_event_init(struct perf_event *event) if (event->pmu == &cstate_core_pmu) { if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) return -EINVAL; - if (!core_msr[cfg].attr) + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX); + if (!(core_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; cpu = cpumask_any_and(&cstate_core_cpu_mask, @@ -298,11 +323,11 @@ static int cstate_pmu_event_init(struct perf_event *event) if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); - if (!pkg_msr[cfg].attr) + if (!(pkg_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; cpu = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_core_cpumask(event->cpu)); + topology_die_cpumask(event->cpu)); } else { return -ENOENT; } @@ -385,7 +410,7 @@ static int cstate_cpu_exit(unsigned int cpu) if (has_cstate_pkg && cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { - target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); /* Migrate events if there is a valid target */ if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_pkg_cpu_mask); @@ -414,15 +439,35 @@ static int cstate_cpu_init(unsigned int cpu) * in the package cpu mask as the designated reader. */ target = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_core_cpumask(cpu)); + topology_die_cpumask(cpu)); if (has_cstate_pkg && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); return 0; } +const struct attribute_group *core_attr_update[] = { + &group_cstate_core_c1, + &group_cstate_core_c3, + &group_cstate_core_c6, + &group_cstate_core_c7, + NULL, +}; + +const struct attribute_group *pkg_attr_update[] = { + &group_cstate_pkg_c2, + &group_cstate_pkg_c3, + &group_cstate_pkg_c6, + &group_cstate_pkg_c7, + &group_cstate_pkg_c8, + &group_cstate_pkg_c9, + &group_cstate_pkg_c10, + NULL, +}; + static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, + .attr_update = core_attr_update, .name = "cstate_core", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, @@ -437,6 +482,7 @@ static struct pmu cstate_core_pmu = { static struct pmu cstate_pkg_pmu = { .attr_groups = pkg_attr_groups, + .attr_update = pkg_attr_update, .name = "cstate_pkg", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, @@ -580,35 +626,11 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); -/* - * Probe the cstate events and insert the available one into sysfs attrs - * Return false if there are no available events. - */ -static bool __init cstate_probe_msr(const unsigned long evmsk, int max, - struct perf_cstate_msr *msr, - struct attribute **attrs) -{ - bool found = false; - unsigned int bit; - u64 val; - - for (bit = 0; bit < max; bit++) { - if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) { - *attrs++ = &msr[bit].attr->attr.attr; - found = true; - } else { - msr[bit].attr = NULL; - } - } - *attrs = NULL; - - return found; -} - static int __init cstate_probe(const struct cstate_model *cm) { /* SLM has different MSR for PKG C6 */ @@ -620,13 +642,14 @@ static int __init cstate_probe(const struct cstate_model *cm) pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; - has_cstate_core = cstate_probe_msr(cm->core_events, - PERF_CSTATE_CORE_EVENT_MAX, - core_msr, core_events_attrs); + core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX, + true, (void *) &cm->core_events); - has_cstate_pkg = cstate_probe_msr(cm->pkg_events, - PERF_CSTATE_PKG_EVENT_MAX, - pkg_msr, pkg_events_attrs); + pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX, + true, (void *) &cm->pkg_events); + + has_cstate_core = !!core_msr_mask; + has_cstate_pkg = !!pkg_msr_mask; return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; } @@ -663,7 +686,13 @@ static int __init cstate_init(void) } if (has_cstate_pkg) { - err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1); + if (topology_max_die_per_package() > 1) { + err = perf_pmu_register(&cstate_pkg_pmu, + "cstate_die", -1); + } else { + err = perf_pmu_register(&cstate_pkg_pmu, + cstate_pkg_pmu.name, -1); + } if (err) { has_cstate_pkg = false; pr_info("Failed to register cstate pkg pmu\n"); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 26c03f5adfb9..64ab51ffdf06 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -55,27 +55,28 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" +#include "../probe.h" MODULE_LICENSE("GPL"); /* * RAPL energy status counters */ -#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ -#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ -#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ -#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ -#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ -#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ -#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ -#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ -#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */ -#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */ - -#define NR_RAPL_DOMAINS 0x5 +enum perf_rapl_events { + PERF_RAPL_PP0 = 0, /* all cores */ + PERF_RAPL_PKG, /* entire package */ + PERF_RAPL_RAM, /* DRAM */ + PERF_RAPL_PP1, /* gpu */ + PERF_RAPL_PSYS, /* psys */ + + PERF_RAPL_MAX, + NR_RAPL_DOMAINS = PERF_RAPL_MAX, +}; + static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "pp0-core", "package", @@ -84,33 +85,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "psys", }; -/* Clients have PP0, PKG */ -#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT) - -/* Servers have PP0, PKG, RAM */ -#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT) - -/* Servers have PP0, PKG, RAM, PP1 */ -#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT) - -/* SKL clients have PP0, PKG, RAM, PP1, PSYS */ -#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT|\ - 1<<RAPL_IDX_PSYS_NRG_STAT) - -/* Knights Landing has PKG, RAM */ -#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT) - /* * event code: LSB 8 bits, passed in attr->config * any other bit is reserved @@ -149,26 +123,32 @@ struct rapl_pmu { struct rapl_pmus { struct pmu pmu; - unsigned int maxpkg; + unsigned int maxdie; struct rapl_pmu *pmus[]; }; +struct rapl_model { + unsigned long events; + bool apply_quirk; +}; + /* 1/2^hw_unit Joule */ static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; static struct rapl_pmus *rapl_pmus; static cpumask_t rapl_cpu_mask; static unsigned int rapl_cntr_mask; static u64 rapl_timer_ms; +static struct perf_msr rapl_msrs[]; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { - unsigned int pkgid = topology_logical_package_id(cpu); + unsigned int dieid = topology_logical_die_id(cpu); /* * The unsigned check also catches the '-1' return value for non * existent mappings in the topology map. */ - return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; + return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL; } static inline u64 rapl_read_counter(struct perf_event *event) @@ -350,7 +330,7 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, msr, ret = 0; + int bit, ret = 0; struct rapl_pmu *pmu; /* only look at RAPL events */ @@ -366,33 +346,12 @@ static int rapl_pmu_event_init(struct perf_event *event) event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - /* - * check event is known (determines counter) - */ - switch (cfg) { - case INTEL_RAPL_PP0: - bit = RAPL_IDX_PP0_NRG_STAT; - msr = MSR_PP0_ENERGY_STATUS; - break; - case INTEL_RAPL_PKG: - bit = RAPL_IDX_PKG_NRG_STAT; - msr = MSR_PKG_ENERGY_STATUS; - break; - case INTEL_RAPL_RAM: - bit = RAPL_IDX_RAM_NRG_STAT; - msr = MSR_DRAM_ENERGY_STATUS; - break; - case INTEL_RAPL_PP1: - bit = RAPL_IDX_PP1_NRG_STAT; - msr = MSR_PP1_ENERGY_STATUS; - break; - case INTEL_RAPL_PSYS: - bit = RAPL_IDX_PSYS_NRG_STAT; - msr = MSR_PLATFORM_ENERGY_STATUS; - break; - default: + if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) return -EINVAL; - } + + cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); + bit = cfg - 1; + /* check event supported */ if (!(rapl_cntr_mask & (1 << bit))) return -EINVAL; @@ -407,7 +366,7 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; event->cpu = pmu->cpu; event->pmu_private = pmu; - event->hw.event_base = msr; + event->hw.event_base = rapl_msrs[bit].msr; event->hw.config = cfg; event->hw.idx = bit; @@ -457,110 +416,111 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); -static struct attribute *rapl_events_srv_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_ram), +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ +static struct attribute *attrs_empty[] = { + NULL, +}; - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_ram_unit), +static struct attribute_group rapl_pmu_events_group = { + .name = "events", + .attrs = attrs_empty, +}; - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_ram_scale), +DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +static struct attribute *rapl_formats_attr[] = { + &format_attr_event.attr, NULL, }; -static struct attribute *rapl_events_cln_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), +static struct attribute_group rapl_pmu_format_group = { + .name = "format", + .attrs = rapl_formats_attr, +}; - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), +static const struct attribute_group *rapl_attr_groups[] = { + &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, NULL, }; -static struct attribute *rapl_events_hsw_attr[] = { +static struct attribute *rapl_events_cores[] = { EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - EVENT_PTR(rapl_ram), - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), - EVENT_PTR(rapl_ram_unit), - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), - EVENT_PTR(rapl_ram_scale), NULL, }; -static struct attribute *rapl_events_skl_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - EVENT_PTR(rapl_ram), - EVENT_PTR(rapl_psys), +static struct attribute_group rapl_events_cores_group = { + .name = "events", + .attrs = rapl_events_cores, +}; - EVENT_PTR(rapl_cores_unit), +static struct attribute *rapl_events_pkg[] = { + EVENT_PTR(rapl_pkg), EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), - EVENT_PTR(rapl_ram_unit), - EVENT_PTR(rapl_psys_unit), - - EVENT_PTR(rapl_cores_scale), EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), - EVENT_PTR(rapl_ram_scale), - EVENT_PTR(rapl_psys_scale), NULL, }; -static struct attribute *rapl_events_knl_attr[] = { - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_ram), +static struct attribute_group rapl_events_pkg_group = { + .name = "events", + .attrs = rapl_events_pkg, +}; - EVENT_PTR(rapl_pkg_unit), +static struct attribute *rapl_events_ram[] = { + EVENT_PTR(rapl_ram), EVENT_PTR(rapl_ram_unit), - - EVENT_PTR(rapl_pkg_scale), EVENT_PTR(rapl_ram_scale), NULL, }; -static struct attribute_group rapl_pmu_events_group = { - .name = "events", - .attrs = NULL, /* patched at runtime */ +static struct attribute_group rapl_events_ram_group = { + .name = "events", + .attrs = rapl_events_ram, }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); -static struct attribute *rapl_formats_attr[] = { - &format_attr_event.attr, +static struct attribute *rapl_events_gpu[] = { + EVENT_PTR(rapl_gpu), + EVENT_PTR(rapl_gpu_unit), + EVENT_PTR(rapl_gpu_scale), NULL, }; -static struct attribute_group rapl_pmu_format_group = { - .name = "format", - .attrs = rapl_formats_attr, +static struct attribute_group rapl_events_gpu_group = { + .name = "events", + .attrs = rapl_events_gpu, }; -static const struct attribute_group *rapl_attr_groups[] = { - &rapl_pmu_attr_group, - &rapl_pmu_format_group, - &rapl_pmu_events_group, +static struct attribute *rapl_events_psys[] = { + EVENT_PTR(rapl_psys), + EVENT_PTR(rapl_psys_unit), + EVENT_PTR(rapl_psys_scale), NULL, }; +static struct attribute_group rapl_events_psys_group = { + .name = "events", + .attrs = rapl_events_psys, +}; + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +static struct perf_msr rapl_msrs[] = { + [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, + [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, + [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, + [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, +}; + static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); @@ -572,7 +532,7 @@ static int rapl_cpu_offline(unsigned int cpu) pmu->cpu = -1; /* Find a new cpu to collect rapl events */ - target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); /* Migrate rapl events to the new target */ if (target < nr_cpu_ids) { @@ -599,14 +559,14 @@ static int rapl_cpu_online(unsigned int cpu) pmu->timer_interval = ms_to_ktime(rapl_timer_ms); rapl_hrtimer_init(pmu); - rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; + rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu; } /* * Check if there is an online cpu in the package which collects rapl * events already. */ - target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); + target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; @@ -633,7 +593,7 @@ static int rapl_check_hw_unit(bool apply_quirk) * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ if (apply_quirk) - rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + rapl_hw_unit[PERF_RAPL_RAM] = 16; /* * Calculate the timer rate: @@ -669,23 +629,33 @@ static void cleanup_rapl_pmus(void) { int i; - for (i = 0; i < rapl_pmus->maxpkg; i++) + for (i = 0; i < rapl_pmus->maxdie; i++) kfree(rapl_pmus->pmus[i]); kfree(rapl_pmus); } +const struct attribute_group *rapl_attr_update[] = { + &rapl_events_cores_group, + &rapl_events_pkg_group, + &rapl_events_ram_group, + &rapl_events_gpu_group, + &rapl_events_gpu_group, + NULL, +}; + static int __init init_rapl_pmus(void) { - int maxpkg = topology_max_packages(); + int maxdie = topology_max_packages() * topology_max_die_per_package(); size_t size; - size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *); + size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); rapl_pmus = kzalloc(size, GFP_KERNEL); if (!rapl_pmus) return -ENOMEM; - rapl_pmus->maxpkg = maxpkg; + rapl_pmus->maxdie = maxdie; rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.attr_update = rapl_attr_update; rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; rapl_pmus->pmu.event_init = rapl_pmu_event_init; rapl_pmus->pmu.add = rapl_pmu_event_add; @@ -701,105 +671,96 @@ static int __init init_rapl_pmus(void) #define X86_RAPL_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } -struct intel_rapl_init_fun { - bool apply_quirk; - int cntr_mask; - struct attribute **attrs; -}; - -static const struct intel_rapl_init_fun snb_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_CLN, - .attrs = rapl_events_cln_attr, +static struct rapl_model model_snb = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_PP1), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun hsx_rapl_init __initconst = { - .apply_quirk = true, - .cntr_mask = RAPL_IDX_SRV, - .attrs = rapl_events_srv_attr, +static struct rapl_model model_snbep = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun hsw_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_HSW, - .attrs = rapl_events_hsw_attr, +static struct rapl_model model_hsw = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun snbep_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_SRV, - .attrs = rapl_events_srv_attr, +static struct rapl_model model_hsx = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = true, }; -static const struct intel_rapl_init_fun knl_rapl_init __initconst = { - .apply_quirk = true, - .cntr_mask = RAPL_IDX_KNL, - .attrs = rapl_events_knl_attr, +static struct rapl_model model_knl = { + .events = BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = true, }; -static const struct intel_rapl_init_fun skl_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_SKL_CLN, - .attrs = rapl_events_skl_attr, +static struct rapl_model model_skl = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1) | + BIT(PERF_RAPL_PSYS), + .apply_quirk = false, }; -static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init), +static const struct x86_cpu_id rapl_model_match[] __initconst = { + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, model_skl), {}, }; -MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match); +MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; - struct intel_rapl_init_fun *rapl_init; - bool apply_quirk; + struct rapl_model *rm; int ret; - id = x86_match_cpu(rapl_cpu_match); + id = x86_match_cpu(rapl_model_match); if (!id) return -ENODEV; - rapl_init = (struct intel_rapl_init_fun *)id->driver_data; - apply_quirk = rapl_init->apply_quirk; - rapl_cntr_mask = rapl_init->cntr_mask; - rapl_pmu_events_group.attrs = rapl_init->attrs; + rm = (struct rapl_model *) id->driver_data; + rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, + false, (void *) &rm->events); - ret = rapl_check_hw_unit(apply_quirk); + ret = rapl_check_hw_unit(rm->apply_quirk); if (ret) return ret; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 9e3fbd47cb56..3694a5d0703d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -8,6 +8,7 @@ static struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; struct intel_uncore_type **uncore_pci_uncores = empty_uncore; +struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; static bool pcidrv_registered; struct pci_driver *uncore_pci_driver; @@ -15,7 +16,7 @@ struct pci_driver *uncore_pci_driver; DEFINE_RAW_SPINLOCK(pci2phy_map_lock); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct pci_extra_dev *uncore_extra_pci_dev; -static int max_packages; +static int max_dies; /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; @@ -28,7 +29,7 @@ struct event_constraint uncore_constraint_empty = MODULE_LICENSE("GPL"); -static int uncore_pcibus_to_physid(struct pci_bus *bus) +int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; int phys_id = -1; @@ -101,13 +102,13 @@ ssize_t uncore_event_show(struct kobject *kobj, struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - unsigned int pkgid = topology_logical_package_id(cpu); + unsigned int dieid = topology_logical_die_id(cpu); /* * The unsigned check also catches the '-1' return value for non * existent mappings in the topology map. */ - return pkgid < max_packages ? pmu->boxes[pkgid] : NULL; + return dieid < max_dies ? pmu->boxes[dieid] : NULL; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -119,6 +120,21 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve return count; } +void uncore_mmio_exit_box(struct intel_uncore_box *box) +{ + if (box->io_addr) + iounmap(box->io_addr); +} + +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (!box->io_addr) + return 0; + + return readq(box->io_addr + event->hw.event_base); +} + /* * generic get constraint function for shared match/mask registers. */ @@ -312,7 +328,7 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, uncore_pmu_init_hrtimer(box); box->cpu = -1; box->pci_phys_id = -1; - box->pkgid = -1; + box->dieid = -1; /* set default hrtimer timeout */ box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; @@ -827,10 +843,10 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { - int pkg; + int die; - for (pkg = 0; pkg < max_packages; pkg++) - kfree(pmu->boxes[pkg]); + for (die = 0; die < max_dies; die++) + kfree(pmu->boxes[die]); kfree(pmu->boxes); } @@ -867,7 +883,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) if (!pmus) return -ENOMEM; - size = max_packages * sizeof(struct intel_uncore_box *); + size = max_dies * sizeof(struct intel_uncore_box *); for (i = 0; i < type->num_boxes; i++) { pmus[i].func_id = setid ? i : -1; @@ -937,20 +953,21 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id struct intel_uncore_type *type; struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; - int phys_id, pkg, ret; + int phys_id, die, ret; phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; - pkg = topology_phys_to_logical_pkg(phys_id); - if (pkg < 0) + die = (topology_max_die_per_package() > 1) ? phys_id : + topology_phys_to_logical_pkg(phys_id); + if (die < 0) return -EINVAL; if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { int idx = UNCORE_PCI_DEV_IDX(id->driver_data); - uncore_extra_pci_dev[pkg].dev[idx] = pdev; + uncore_extra_pci_dev[die].dev[idx] = pdev; pci_set_drvdata(pdev, NULL); return 0; } @@ -989,7 +1006,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } - if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) + if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) return -EINVAL; box = uncore_alloc_box(type, NUMA_NO_NODE); @@ -1003,13 +1020,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id atomic_inc(&box->refcnt); box->pci_phys_id = phys_id; - box->pkgid = pkg; + box->dieid = die; box->pci_dev = pdev; box->pmu = pmu; uncore_box_init(box); pci_set_drvdata(pdev, box); - pmu->boxes[pkg] = box; + pmu->boxes[die] = box; if (atomic_inc_return(&pmu->activeboxes) > 1) return 0; @@ -1017,7 +1034,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ret = uncore_pmu_register(pmu); if (ret) { pci_set_drvdata(pdev, NULL); - pmu->boxes[pkg] = NULL; + pmu->boxes[die] = NULL; uncore_box_exit(box); kfree(box); } @@ -1028,16 +1045,17 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; - int i, phys_id, pkg; + int i, phys_id, die; phys_id = uncore_pcibus_to_physid(pdev->bus); box = pci_get_drvdata(pdev); if (!box) { - pkg = topology_phys_to_logical_pkg(phys_id); + die = (topology_max_die_per_package() > 1) ? phys_id : + topology_phys_to_logical_pkg(phys_id); for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { - if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { - uncore_extra_pci_dev[pkg].dev[i] = NULL; + if (uncore_extra_pci_dev[die].dev[i] == pdev) { + uncore_extra_pci_dev[die].dev[i] = NULL; break; } } @@ -1050,7 +1068,7 @@ static void uncore_pci_remove(struct pci_dev *pdev) return; pci_set_drvdata(pdev, NULL); - pmu->boxes[box->pkgid] = NULL; + pmu->boxes[box->dieid] = NULL; if (atomic_dec_return(&pmu->activeboxes) == 0) uncore_pmu_unregister(pmu); uncore_box_exit(box); @@ -1062,7 +1080,7 @@ static int __init uncore_pci_init(void) size_t size; int ret; - size = max_packages * sizeof(struct pci_extra_dev); + size = max_dies * sizeof(struct pci_extra_dev); uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); if (!uncore_extra_pci_dev) { ret = -ENOMEM; @@ -1109,11 +1127,11 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, { struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; - int i, pkg; + int i, die; - pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu); + die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; + box = pmu->boxes[die]; if (!box) continue; @@ -1141,18 +1159,33 @@ static void uncore_change_context(struct intel_uncore_type **uncores, uncore_change_type_ctx(*uncores, old_cpu, new_cpu); } -static int uncore_event_cpu_offline(unsigned int cpu) +static void uncore_box_unref(struct intel_uncore_type **types, int id) { - struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, pkg, target; + int i; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[id]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); + } + } +} + +static int uncore_event_cpu_offline(unsigned int cpu) +{ + int die, target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) goto unref; /* Find a new cpu to collect uncore events */ - target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); /* Migrate uncore events to the new target */ if (target < nr_cpu_ids) @@ -1161,25 +1194,19 @@ static int uncore_event_cpu_offline(unsigned int cpu) target = -1; uncore_change_context(uncore_msr_uncores, cpu, target); + uncore_change_context(uncore_mmio_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); unref: /* Clear the references */ - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box && atomic_dec_return(&box->refcnt) == 0) - uncore_box_exit(box); - } - } + die = topology_logical_die_id(cpu); + uncore_box_unref(uncore_msr_uncores, die); + uncore_box_unref(uncore_mmio_uncores, die); return 0; } static int allocate_boxes(struct intel_uncore_type **types, - unsigned int pkg, unsigned int cpu) + unsigned int die, unsigned int cpu) { struct intel_uncore_box *box, *tmp; struct intel_uncore_type *type; @@ -1192,20 +1219,20 @@ static int allocate_boxes(struct intel_uncore_type **types, type = *types; pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { - if (pmu->boxes[pkg]) + if (pmu->boxes[die]) continue; box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) goto cleanup; box->pmu = pmu; - box->pkgid = pkg; + box->dieid = die; list_add(&box->active_list, &allocated); } } /* Install them in the pmus */ list_for_each_entry_safe(box, tmp, &allocated, active_list) { list_del_init(&box->active_list); - box->pmu->boxes[pkg] = box; + box->pmu->boxes[die] = box; } return 0; @@ -1217,15 +1244,15 @@ cleanup: return -ENOMEM; } -static int uncore_event_cpu_online(unsigned int cpu) +static int uncore_box_ref(struct intel_uncore_type **types, + int id, unsigned int cpu) { - struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, ret, pkg, target; + int i, ret; - pkg = topology_logical_package_id(cpu); - ret = allocate_boxes(types, pkg, cpu); + ret = allocate_boxes(types, id, cpu); if (ret) return ret; @@ -1233,23 +1260,38 @@ static int uncore_event_cpu_online(unsigned int cpu) type = *types; pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; + box = pmu->boxes[id]; if (box && atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } + return 0; +} + +static int uncore_event_cpu_online(unsigned int cpu) +{ + int die, target, msr_ret, mmio_ret; + + die = topology_logical_die_id(cpu); + msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); + mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); + if (msr_ret && mmio_ret) + return -ENOMEM; /* * Check if there is an online cpu in the package * which collects uncore events already. */ - target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); + target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; cpumask_set_cpu(cpu, &uncore_cpu_mask); - uncore_change_context(uncore_msr_uncores, -1, cpu); + if (!msr_ret) + uncore_change_context(uncore_msr_uncores, -1, cpu); + if (!mmio_ret) + uncore_change_context(uncore_mmio_uncores, -1, cpu); uncore_change_context(uncore_pci_uncores, -1, cpu); return 0; } @@ -1297,12 +1339,35 @@ err: return ret; } +static int __init uncore_mmio_init(void) +{ + struct intel_uncore_type **types = uncore_mmio_uncores; + int ret; + + ret = uncore_types_init(types, true); + if (ret) + goto err; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + goto err; + } + return 0; +err: + uncore_types_exit(uncore_mmio_uncores); + uncore_mmio_uncores = empty_uncore; + return ret; +} + + #define X86_UNCORE_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); + void (*mmio_init)(void); }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1373,6 +1438,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = { .pci_init = skl_uncore_pci_init, }; +static const struct intel_uncore_init_fun snr_uncore_init __initconst = { + .cpu_init = snr_uncore_cpu_init, + .pci_init = snr_uncore_pci_init, + .mmio_init = snr_uncore_mmio_init, +}; + static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), @@ -1400,6 +1471,9 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init), {}, }; @@ -1409,7 +1483,7 @@ static int __init intel_uncore_init(void) { const struct x86_cpu_id *id; struct intel_uncore_init_fun *uncore_init; - int pret = 0, cret = 0, ret; + int pret = 0, cret = 0, mret = 0, ret; id = x86_match_cpu(intel_uncore_match); if (!id) @@ -1418,7 +1492,7 @@ static int __init intel_uncore_init(void) if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return -ENODEV; - max_packages = topology_max_packages(); + max_dies = topology_max_packages() * topology_max_die_per_package(); uncore_init = (struct intel_uncore_init_fun *)id->driver_data; if (uncore_init->pci_init) { @@ -1432,7 +1506,12 @@ static int __init intel_uncore_init(void) cret = uncore_cpu_init(); } - if (cret && pret) + if (uncore_init->mmio_init) { + uncore_init->mmio_init(); + mret = uncore_mmio_init(); + } + + if (cret && pret && mret) return -ENODEV; /* Install hotplug callbacks to setup the targets for each package */ @@ -1446,6 +1525,7 @@ static int __init intel_uncore_init(void) err: uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); return ret; } @@ -1455,6 +1535,7 @@ static void __exit intel_uncore_exit(void) { cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79eb2e21e4f0..f36f7bebbc1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -2,6 +2,7 @@ #include <linux/slab.h> #include <linux/pci.h> #include <asm/apicdef.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/perf_event.h> #include "../perf_event.h" @@ -56,7 +57,10 @@ struct intel_uncore_type { unsigned fixed_ctr; unsigned fixed_ctl; unsigned box_ctl; - unsigned msr_offset; + union { + unsigned msr_offset; + unsigned mmio_offset; + }; unsigned num_shared_regs:8; unsigned single_fixed:1; unsigned pair_ctr_ctl:1; @@ -108,7 +112,7 @@ struct intel_uncore_extra_reg { struct intel_uncore_box { int pci_phys_id; - int pkgid; /* Logical package ID */ + int dieid; /* Logical die ID */ int n_active; /* number of active events */ int n_events; int cpu; /* cpu to collect events */ @@ -125,7 +129,7 @@ struct intel_uncore_box { struct hrtimer hrtimer; struct list_head list; struct list_head active_list; - void *io_addr; + void __iomem *io_addr; struct intel_uncore_extra_reg shared_regs[0]; }; @@ -159,6 +163,7 @@ struct pci2phy_map { }; struct pci2phy_map *__find_pci2phy_map(int segment); +int uncore_pcibus_to_physid(struct pci_bus *bus); ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -190,6 +195,13 @@ static inline bool uncore_pmc_freerunning(int idx) return idx == UNCORE_PMC_IDX_FREERUNNING; } +static inline +unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->box_ctl + + box->pmu->type->mmio_offset * box->pmu->pmu_idx; +} + static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) { return box->pmu->type->box_ctl; @@ -330,7 +342,7 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) static inline unsigned uncore_fixed_ctl(struct intel_uncore_box *box) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_fixed_ctl(box); else return uncore_msr_fixed_ctl(box); @@ -339,7 +351,7 @@ unsigned uncore_fixed_ctl(struct intel_uncore_box *box) static inline unsigned uncore_fixed_ctr(struct intel_uncore_box *box) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_fixed_ctr(box); else return uncore_msr_fixed_ctr(box); @@ -348,7 +360,7 @@ unsigned uncore_fixed_ctr(struct intel_uncore_box *box) static inline unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_event_ctl(box, idx); else return uncore_msr_event_ctl(box, idx); @@ -357,7 +369,7 @@ unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) static inline unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_perf_ctr(box, idx); else return uncore_msr_perf_ctr(box, idx); @@ -419,6 +431,16 @@ static inline bool is_freerunning_event(struct perf_event *event) (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START); } +/* Check and reject invalid config */ +static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (is_freerunning_event(event)) + return 0; + + return -EINVAL; +} + static inline void uncore_disable_box(struct intel_uncore_box *box) { if (box->pmu->type->ops->disable_box) @@ -467,7 +489,7 @@ static inline void uncore_box_exit(struct intel_uncore_box *box) static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { - return (box->pkgid < 0); + return (box->dieid < 0); } static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) @@ -482,6 +504,9 @@ static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *ev struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); +void uncore_mmio_exit_box(struct intel_uncore_box *box); +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event); void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); void uncore_pmu_event_start(struct perf_event *event, int flags); @@ -497,6 +522,7 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; +extern struct intel_uncore_type **uncore_mmio_uncores; extern struct pci_driver *uncore_pci_driver; extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; @@ -528,6 +554,9 @@ int knl_uncore_pci_init(void); void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); +int snr_uncore_pci_init(void); +void snr_uncore_cpu_init(void); +void snr_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index f8431819b3e1..dbaa1b088a30 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -3,27 +3,29 @@ #include "uncore.h" /* Uncore IMC PCI IDs */ -#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 -#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 -#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 -#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c -#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 -#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 -#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f -#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c -#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 -#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 -#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f -#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f -#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc -#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 -#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 -#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 +#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_KBL_HQ_IMC 0x5910 +#define PCI_DEVICE_ID_INTEL_KBL_WQ_IMC 0x5918 +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 #define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f #define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f #define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 @@ -34,9 +36,15 @@ #define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 #define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca #define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 +#define PCI_DEVICE_ID_INTEL_AML_YD_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_AML_YQ_IMC 0x590d +#define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34 +#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 @@ -420,11 +428,6 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } -static void snb_uncore_imc_exit_box(struct intel_uncore_box *box) -{ - iounmap(box->io_addr); -} - static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) {} @@ -437,13 +440,6 @@ static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct per static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) {} -static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); -} - /* * Keep the custom event_init() function compatible with old event * encoding for free running counters. @@ -570,13 +566,13 @@ static struct pmu snb_uncore_imc_pmu = { static struct intel_uncore_ops snb_uncore_imc_ops = { .init_box = snb_uncore_imc_init_box, - .exit_box = snb_uncore_imc_exit_box, + .exit_box = uncore_mmio_exit_box, .enable_box = snb_uncore_imc_enable_box, .disable_box = snb_uncore_imc_disable_box, .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = snb_uncore_imc_read_counter, + .read_counter = uncore_mmio_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { @@ -682,6 +678,14 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, @@ -737,6 +741,26 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -807,6 +831,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(KBL_HQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core H Quad Core */ + IMC_DEV(KBL_WQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S 4 cores Work Station */ IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ @@ -821,6 +847,11 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ + IMC_DEV(AML_YD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Dual Core */ + IMC_DEV(AML_YQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Quad Core */ + IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */ IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ { /* end marker */ } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b10e04387f38..b10a5ec79e48 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -324,12 +324,77 @@ #define SKX_M2M_PCI_PMON_CTR0 0x200 #define SKX_M2M_PCI_PMON_BOX_CTL 0x258 +/* SNR Ubox */ +#define SNR_U_MSR_PMON_CTR0 0x1f98 +#define SNR_U_MSR_PMON_CTL0 0x1f91 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTL 0x1f93 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTR 0x1f94 + +/* SNR CHA */ +#define SNR_CHA_RAW_EVENT_MASK_EXT 0x3ffffff +#define SNR_CHA_MSR_PMON_CTL0 0x1c01 +#define SNR_CHA_MSR_PMON_CTR0 0x1c08 +#define SNR_CHA_MSR_PMON_BOX_CTL 0x1c00 +#define SNR_C0_MSR_PMON_BOX_FILTER0 0x1c05 + + +/* SNR IIO */ +#define SNR_IIO_MSR_PMON_CTL0 0x1e08 +#define SNR_IIO_MSR_PMON_CTR0 0x1e01 +#define SNR_IIO_MSR_PMON_BOX_CTL 0x1e00 +#define SNR_IIO_MSR_OFFSET 0x10 +#define SNR_IIO_PMON_RAW_EVENT_MASK_EXT 0x7ffff + +/* SNR IRP */ +#define SNR_IRP0_MSR_PMON_CTL0 0x1ea8 +#define SNR_IRP0_MSR_PMON_CTR0 0x1ea1 +#define SNR_IRP0_MSR_PMON_BOX_CTL 0x1ea0 +#define SNR_IRP_MSR_OFFSET 0x10 + +/* SNR M2PCIE */ +#define SNR_M2PCIE_MSR_PMON_CTL0 0x1e58 +#define SNR_M2PCIE_MSR_PMON_CTR0 0x1e51 +#define SNR_M2PCIE_MSR_PMON_BOX_CTL 0x1e50 +#define SNR_M2PCIE_MSR_OFFSET 0x10 + +/* SNR PCU */ +#define SNR_PCU_MSR_PMON_CTL0 0x1ef1 +#define SNR_PCU_MSR_PMON_CTR0 0x1ef8 +#define SNR_PCU_MSR_PMON_BOX_CTL 0x1ef0 +#define SNR_PCU_MSR_PMON_BOX_FILTER 0x1efc + +/* SNR M2M */ +#define SNR_M2M_PCI_PMON_CTL0 0x468 +#define SNR_M2M_PCI_PMON_CTR0 0x440 +#define SNR_M2M_PCI_PMON_BOX_CTL 0x438 +#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff + +/* SNR PCIE3 */ +#define SNR_PCIE3_PCI_PMON_CTL0 0x508 +#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 +#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4 + +/* SNR IMC */ +#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 +#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 +#define SNR_IMC_MMIO_PMON_CTL0 0x40 +#define SNR_IMC_MMIO_PMON_CTR0 0x8 +#define SNR_IMC_MMIO_PMON_BOX_CTL 0x22800 +#define SNR_IMC_MMIO_OFFSET 0x4000 +#define SNR_IMC_MMIO_SIZE 0x4000 +#define SNR_IMC_MMIO_BASE_OFFSET 0xd0 +#define SNR_IMC_MMIO_BASE_MASK 0x1FFFFFFF +#define SNR_IMC_MMIO_MEM0_OFFSET 0xd8 +#define SNR_IMC_MMIO_MEM0_MASK 0x7FF + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -343,11 +408,14 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31"); DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43"); +DEFINE_UNCORE_FORMAT_ATTR(ch_mask2, ch_mask, "config:36-47"); DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46"); +DEFINE_UNCORE_FORMAT_ATTR(fc_mask2, fc_mask, "config:48-50"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid5, filter_tid, "config1:0-9"); DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); @@ -1058,8 +1126,8 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve if (reg1->idx != EXTRA_REG_NONE) { int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - int pkg = box->pkgid; - struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; + int die = box->dieid; + struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx]; if (filter_pdev) { pci_write_config_dword(filter_pdev, reg1->reg, @@ -3585,6 +3653,7 @@ static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = { static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = { .read_counter = uncore_msr_read_counter, + .hw_config = uncore_freerunning_hw_config, }; static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = { @@ -3967,3 +4036,535 @@ int skx_uncore_pci_init(void) } /* end of SKX uncore support */ + +/* SNR uncore support */ + +static struct intel_uncore_type snr_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = SNR_U_MSR_PMON_CTR0, + .event_ctl = SNR_U_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNR_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNR_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct attribute *snr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext2.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group snr_uncore_chabox_format_group = { + .name = "format", + .attrs = snr_uncore_cha_formats_attr, +}; + +static int snr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + reg1->reg = SNR_C0_MSR_PMON_BOX_FILTER0 + + box->pmu->type->msr_offset * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; + reg1->idx = 0; + + return 0; +} + +static void snr_cha_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops snr_uncore_chabox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snr_cha_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = snr_cha_hw_config, +}; + +static struct intel_uncore_type snr_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = SNR_CHA_MSR_PMON_CTL0, + .perf_ctr = SNR_CHA_MSR_PMON_CTR0, + .box_ctl = SNR_CHA_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, + .ops = &snr_uncore_chabox_ops, + .format_group = &snr_uncore_chabox_format_group, +}; + +static struct attribute *snr_uncore_iio_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh9.attr, + &format_attr_ch_mask2.attr, + &format_attr_fc_mask2.attr, + NULL, +}; + +static const struct attribute_group snr_uncore_iio_format_group = { + .name = "format", + .attrs = snr_uncore_iio_formats_attr, +}; + +static struct intel_uncore_type snr_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IIO_MSR_PMON_CTL0, + .perf_ctr = SNR_IIO_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IIO_MSR_OFFSET, + .ops = &ivbep_uncore_msr_ops, + .format_group = &snr_uncore_iio_format_group, +}; + +static struct intel_uncore_type snr_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IRP0_MSR_PMON_CTL0, + .perf_ctr = SNR_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IRP0_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IRP_MSR_OFFSET, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct intel_uncore_type snr_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_M2PCIE_MSR_PMON_CTL0, + .perf_ctr = SNR_M2PCIE_MSR_PMON_CTR0, + .box_ctl = SNR_M2PCIE_MSR_PMON_BOX_CTL, + .msr_offset = SNR_M2PCIE_MSR_OFFSET, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static int snr_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNR_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops snr_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snr_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type snr_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCU_MSR_PMON_CTR0, + .event_ctl = SNR_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snr_uncore_pcu_ops, + .format_group = &skx_uncore_pcu_format_group, +}; + +enum perf_uncore_snr_iio_freerunning_type_id { + SNR_IIO_MSR_IOCLK, + SNR_IIO_MSR_BW_IN, + + SNR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_iio_freerunning[] = { + [SNR_IIO_MSR_IOCLK] = { 0x1eac, 0x1, 0x10, 1, 48 }, + [SNR_IIO_MSR_BW_IN] = { 0x1f00, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 9, + .num_boxes = 5, + .num_freerunning_types = SNR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = snr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = snr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_msr_uncores[] = { + &snr_uncore_ubox, + &snr_uncore_chabox, + &snr_uncore_iio, + &snr_uncore_irp, + &snr_uncore_m2pcie, + &snr_uncore_pcu, + &snr_uncore_iio_free_running, + NULL, +}; + +void snr_uncore_cpu_init(void) +{ + uncore_msr_uncores = snr_msr_uncores; +} + +static void snr_m2m_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, box_ctl, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops snr_m2m_uncore_pci_ops = { + .init_box = snr_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct attribute *snr_m2m_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext3.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group snr_m2m_uncore_format_group = { + .name = "format", + .attrs = snr_m2m_uncore_formats_attr, +}; + +static struct intel_uncore_type snr_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, + .event_ctl = SNR_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, + .ops = &snr_m2m_uncore_pci_ops, + .format_group = &snr_m2m_uncore_format_group, +}; + +static struct intel_uncore_type snr_uncore_pcie3 = { + .name = "pcie3", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, + .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &ivbep_uncore_format_group, +}; + +enum { + SNR_PCI_UNCORE_M2M, + SNR_PCI_UNCORE_PCIE3, +}; + +static struct intel_uncore_type *snr_pci_uncores[] = { + [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, + [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, + NULL, +}; + +static const struct pci_device_id snr_uncore_pci_ids[] = { + { /* M2M */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), + }, + { /* PCIe3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snr_uncore_pci_driver = { + .name = "snr_uncore", + .id_table = snr_uncore_pci_ids, +}; + +int snr_uncore_pci_init(void) +{ + /* SNR UBOX DID */ + int ret = snbep_pci2phy_map_init(0x3460, SKX_CPUNODEID, + SKX_GIDNIDMAP, true); + + if (ret) + return ret; + + uncore_pci_uncores = snr_pci_uncores; + uncore_pci_driver = &snr_uncore_pci_driver; + return 0; +} + +static struct pci_dev *snr_uncore_get_mc_dev(int id) +{ + struct pci_dev *mc_dev = NULL; + int phys_id, pkg; + + while (1) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); + if (!mc_dev) + break; + phys_id = uncore_pcibus_to_physid(mc_dev->bus); + if (phys_id < 0) + continue; + pkg = topology_phys_to_logical_pkg(phys_id); + if (pkg < 0) + continue; + else if (pkg == id) + break; + } + return mc_dev; +} + +static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); + unsigned int box_ctl = uncore_mmio_box_ctl(box); + resource_size_t addr; + u32 pci_dword; + + if (!pdev) + return; + + pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); + addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + + pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); + addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; + + addr += box_ctl; + + box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE); + if (!box->io_addr) + return; + + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); +} + +static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config |= SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config | SNBEP_PMON_CTL_EN, + box->io_addr + hwc->config_base); +} + +static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops snr_uncore_mmio_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = snr_uncore_mmio_disable_box, + .enable_box = snr_uncore_mmio_enable_box, + .disable_event = snr_uncore_mmio_disable_event, + .enable_event = snr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct uncore_event_desc snr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = snr_uncore_imc_events, + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, + .mmio_offset = SNR_IMC_MMIO_OFFSET, + .ops = &snr_uncore_mmio_ops, + .format_group = &skx_uncore_format_group, +}; + +enum perf_uncore_snr_imc_freerunning_type_id { + SNR_IMC_DCLK, + SNR_IMC_DDR, + + SNR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_imc_freerunning[] = { + [SNR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SNR_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), +}; + +static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type snr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 1, + .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX, + .freerunning = snr_imc_freerunning, + .ops = &snr_uncore_imc_freerunning_ops, + .event_descs = snr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_mmio_uncores[] = { + &snr_uncore_imc, + &snr_uncore_imc_free_running, + NULL, +}; + +void snr_uncore_mmio_init(void) +{ + uncore_mmio_uncores = snr_mmio_uncores; +} + +/* end of SNR uncore support */ diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index f3f4c2263501..9431447541e9 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/perf_event.h> +#include <linux/sysfs.h> #include <linux/nospec.h> #include <asm/intel-family.h> +#include "probe.h" enum perf_msr_id { PERF_MSR_TSC = 0, @@ -12,32 +14,30 @@ enum perf_msr_id { PERF_MSR_PTSC = 5, PERF_MSR_IRPERF = 6, PERF_MSR_THERM = 7, - PERF_MSR_THERM_SNAP = 8, - PERF_MSR_THERM_UNIT = 9, PERF_MSR_EVENT_MAX, }; -static bool test_aperfmperf(int idx) +static bool test_aperfmperf(int idx, void *data) { return boot_cpu_has(X86_FEATURE_APERFMPERF); } -static bool test_ptsc(int idx) +static bool test_ptsc(int idx, void *data) { return boot_cpu_has(X86_FEATURE_PTSC); } -static bool test_irperf(int idx) +static bool test_irperf(int idx, void *data) { return boot_cpu_has(X86_FEATURE_IRPERF); } -static bool test_therm_status(int idx) +static bool test_therm_status(int idx, void *data) { return boot_cpu_has(X86_FEATURE_DTHERM); } -static bool test_intel(int idx) +static bool test_intel(int idx, void *data) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) @@ -98,37 +98,51 @@ static bool test_intel(int idx) return false; } -struct perf_msr { - u64 msr; - struct perf_pmu_events_attr *attr; - bool (*test)(int idx); +PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" ); +PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" ); +PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" ); +PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" ); +PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" ); +PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" ); +PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" ); + +static unsigned long msr_mask; + +PMU_EVENT_GROUP(events, aperf); +PMU_EVENT_GROUP(events, mperf); +PMU_EVENT_GROUP(events, pperf); +PMU_EVENT_GROUP(events, smi); +PMU_EVENT_GROUP(events, ptsc); +PMU_EVENT_GROUP(events, irperf); + +static struct attribute *attrs_therm[] = { + &attr_therm.attr.attr, + &attr_therm_snap.attr.attr, + &attr_therm_unit.attr.attr, + NULL, }; -PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" ); -PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" ); -PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" ); -PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" ); -PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" ); -PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" ); -PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" ); +static struct attribute_group group_therm = { + .name = "events", + .attrs = attrs_therm, +}; static struct perf_msr msr[] = { - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, - [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, - [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, - [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, - [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, - [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, + [PERF_MSR_TSC] = { .no_check = true, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, }, + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, }, }; -static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { +static struct attribute *events_attrs[] = { + &attr_tsc.attr.attr, NULL, }; @@ -153,6 +167,17 @@ static const struct attribute_group *attr_groups[] = { NULL, }; +const struct attribute_group *attr_update[] = { + &group_aperf, + &group_mperf, + &group_pperf, + &group_smi, + &group_ptsc, + &group_irperf, + &group_therm, + NULL, +}; + static int msr_event_init(struct perf_event *event) { u64 cfg = event->attr.config; @@ -169,7 +194,7 @@ static int msr_event_init(struct perf_event *event) cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); - if (!msr[cfg].attr) + if (!(msr_mask & (1 << cfg))) return -EINVAL; event->hw.idx = -1; @@ -252,32 +277,17 @@ static struct pmu pmu_msr = { .stop = msr_event_stop, .read = msr_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .attr_update = attr_update, }; static int __init msr_init(void) { - int i, j = 0; - if (!boot_cpu_has(X86_FEATURE_TSC)) { pr_cont("no MSR PMU driver.\n"); return 0; } - /* Probe the MSRs. */ - for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) { - u64 val; - - /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ - if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) - msr[i].attr = NULL; - } - - /* List remaining MSRs in the sysfs attrs. */ - for (i = 0; i < PERF_MSR_EVENT_MAX; i++) { - if (msr[i].attr) - events_attrs[j++] = &msr[i].attr->attr.attr; - } - events_attrs[j] = NULL; + msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL); perf_pmu_register(&pmu_msr, "msr", -1); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4e346856ee19..8751008fc170 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -613,14 +613,11 @@ struct x86_pmu { int attr_rdpmc_broken; int attr_rdpmc; struct attribute **format_attrs; - struct attribute **event_attrs; - struct attribute **caps_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); - struct attribute **cpu_events; + const struct attribute_group **attr_update; unsigned long attr_freeze_on_smi; - struct attribute **attrs; /* * CPU Hotplug hooks @@ -886,8 +883,6 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t intel_event_sysfs_show(char *page, u64 config); -struct attribute **merge_attr(struct attribute **a, struct attribute **b); - ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c new file mode 100644 index 000000000000..c2ede2f3b277 --- /dev/null +++ b/arch/x86/events/probe.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include <linux/types.h> +#include <linux/bits.h> +#include "probe.h" + +static umode_t +not_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return 0; +} + +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) +{ + unsigned long avail = 0; + unsigned int bit; + u64 val; + + if (cnt >= BITS_PER_LONG) + return 0; + + for (bit = 0; bit < cnt; bit++) { + if (!msr[bit].no_check) { + struct attribute_group *grp = msr[bit].grp; + + grp->is_visible = not_visible; + + if (msr[bit].test && !msr[bit].test(bit, data)) + continue; + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ + if (rdmsrl_safe(msr[bit].msr, &val)) + continue; + /* Disable zero counters if requested. */ + if (!zero && !val) + continue; + + grp->is_visible = NULL; + } + avail |= BIT(bit); + } + + return avail; +} +EXPORT_SYMBOL_GPL(perf_msr_probe); diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h new file mode 100644 index 000000000000..4c8e0afc5fb5 --- /dev/null +++ b/arch/x86/events/probe.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_X86_EVENTS_PROBE_H__ +#define __ARCH_X86_EVENTS_PROBE_H__ +#include <linux/sysfs.h> + +struct perf_msr { + u64 msr; + struct attribute_group *grp; + bool (*test)(int idx, void *data); + bool no_check; +}; + +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data); + +#define __PMU_EVENT_GROUP(_name) \ +static struct attribute *attrs_##_name[] = { \ + &attr_##_name.attr.attr, \ + NULL, \ +} + +#define PMU_EVENT_GROUP(_grp, _name) \ +__PMU_EVENT_GROUP(_name); \ +static struct attribute_group group_##_name = { \ + .name = #_grp, \ + .attrs = attrs_##_name, \ +} + +#endif /* __ARCH_X86_EVENTS_PROBE_H__ */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1d337c51f7e6..58acda503817 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -22,8 +22,8 @@ enum cpuid_leafs CPUID_LNX_3, CPUID_7_0_EBX, CPUID_D_1_EAX, - CPUID_F_0_EDX, - CPUID_F_1_EDX, + CPUID_LNX_4, + CPUID_7_1_EAX, CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 75f27ee2c263..998c2cc08363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -239,12 +239,14 @@ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ #define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ @@ -269,13 +271,19 @@ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0xf, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ +/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -322,6 +330,7 @@ #define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 310118805f57..0278aa66ef62 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -56,6 +56,7 @@ #define INTEL_FAM6_ICELAKE_XEON_D 0x6C #define INTEL_FAM6_ICELAKE_DESKTOP 0x7D #define INTEL_FAM6_ICELAKE_MOBILE 0x7E +#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* "Small Core" Processors (Atom) */ @@ -76,6 +77,7 @@ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ + #define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 979ef971cc78..6b4fc2788078 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,6 +61,15 @@ #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) +#define MSR_IA32_UMWAIT_CONTROL 0xe1 +#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) +#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c34a35c78618..3eab6ece52b4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -105,7 +105,7 @@ struct cpuinfo_x86 { int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ - u16 x86_max_cores; + u16 x86_max_cores; u16 apicid; u16 initial_apicid; u16 x86_clflush_size; @@ -117,6 +117,8 @@ struct cpuinfo_x86 { u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; + u16 cpu_die_id; + u16 logical_die_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; @@ -144,7 +146,8 @@ enum cpuid_regs_idx { #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 -#define X86_VENDOR_NUM 10 +#define X86_VENDOR_ZHAOXIN 10 +#define X86_VENDOR_NUM 11 #define X86_VENDOR_UNKNOWN 0xff diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index da545df207b2..b673a226ad6c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -23,6 +23,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 453cf38a1c33..4b14d2318251 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -106,15 +106,25 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) +#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #ifdef CONFIG_SMP +#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) +extern unsigned int __max_die_per_package; + +static inline int topology_max_die_per_package(void) +{ + return __max_die_per_package; +} + extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -123,14 +133,21 @@ static inline int topology_max_smt_threads(void) } int topology_update_package_map(unsigned int apicid, unsigned int cpu); +int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); +int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); bool topology_is_primary_thread(unsigned int cpu); bool topology_smt_supported(void); #else #define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } +static inline int +topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_phys_to_logical_die(unsigned int die, + unsigned int cpu) { return 0; } +static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline bool topology_smt_supported(void) { return false; } diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index a5e5484988fd..caf2edccbad2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -64,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, c->x86_stepping >= 0x0e)) flags->bm_check = 1; } + + if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { + /* + * All Zhaoxin CPUs that support C3 share cache. + * And caches should not be flushed by software while + * entering C3 type state. + */ + flags->bm_check = 1; + /* + * On all recent Zhaoxin platforms, ARB_DISABLE is a nop. + * So, set bm_control to zero to indicate that ARB_DISABLE + * is not required while entering C3 type state. + */ + flags->bm_control = 0; + } } EXPORT_SYMBOL(acpi_processor_power_init_bm_check); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 5102bf7c8192..4b4eb06e117c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -24,6 +24,7 @@ obj-y += match.o obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o +obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o @@ -38,6 +39,7 @@ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o +obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index e71a6ff8a67e..e2f319dc992d 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -13,6 +13,7 @@ #include <linux/percpu.h> #include <linux/cpufreq.h> #include <linux/smp.h> +#include <linux/sched/isolation.h> #include "cpu.h" @@ -85,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + aperfmperf_snapshot_cpu(cpu, ktime_get(), true); return per_cpu(samples.khz, cpu); } @@ -101,9 +105,12 @@ void arch_freq_prepare_all(void) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return; - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + continue; if (!aperfmperf_snapshot_cpu(cpu, now, false)) wait = true; + } if (wait) msleep(APERFMPERF_REFRESH_DELAY_MS); @@ -117,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) return per_cpu(samples.khz, cpu); diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 395d46f78582..c7503be92f35 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) if (c->x86 < 0x17) { /* LLC is at the node level. */ per_cpu(cpu_llc_id, cpu) = node_id; - } else if (c->x86 == 0x17 && - c->x86_model >= 0 && c->x86_model <= 0x1F) { + } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. * Core complex ID is ApicId[3] for these processors. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c57fffebf9b..482f74859fb7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -801,6 +801,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c) } } +static void init_cqm(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + return; + } + + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = cpuid_ebx(0xf); + + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); + + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -823,6 +847,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; c->x86_capability[CPUID_7_EDX] = edx; + + /* Check valid sub-leaf index before accessing it */ + if (eax >= 1) { + cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[CPUID_7_1_EAX] = eax; + } } /* Extended state features: level 0x0000000d */ @@ -832,33 +862,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* Additional Intel-defined flags: level 0x0000000F */ - if (c->cpuid_level >= 0x0000000F) { - - /* QoS sub-leaf, EAX=0Fh, ECX=0 */ - cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_0_EDX] = edx; - - if (cpu_has(c, X86_FEATURE_CQM_LLC)) { - /* will be overridden if occupancy monitoring exists */ - c->x86_cache_max_rmid = ebx; - - /* QoS sub-leaf, EAX=0Fh, ECX=1 */ - cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_1_EDX] = edx; - - if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || - ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || - (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) { - c->x86_cache_max_rmid = ecx; - c->x86_cache_occ_scale = ebx; - } - } else { - c->x86_cache_max_rmid = -1; - c->x86_cache_occ_scale = -1; - } - } - /* AMD-defined flags: level 0x80000001 */ eax = cpuid_eax(0x80000000); c->extended_cpuid_level = eax; @@ -889,6 +892,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); + init_cqm(c); /* * Clear/Set all flags overridden by options, after probe. @@ -1299,6 +1303,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c) cpu, apicid, c->initial_apicid); } BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); + BUG_ON(topology_update_die_map(c->cpu_die_id, cpu)); #else c->logical_proc_id = 0; #endif diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 2c0bd38a44ab..a444028d8145 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -59,6 +59,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, + { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, {} }; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f17c1a714779..8d6d92ebeb54 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Processors which have self-snooping capability can handle conflicting + * memory type across CPUs by snooping its own cache. However, there exists + * CPU models in which having conflicting memory types still leads to + * unpredictable behavior, machine check errors, or hangs. Clear this + * feature to prevent its use on machines with known erratas. + */ +static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) +{ + switch (c->x86_model) { + case INTEL_FAM6_CORE_YONAH: + case INTEL_FAM6_CORE2_MEROM: + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_G: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_SANDYBRIDGE: + setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); + } +} + static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) @@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) } check_mpx_erratum(c); + check_memory_type_self_snoop_errata(c); /* * Get the number of SMT siblings early from the extended topology diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9356c1c9024d..aa5c064a6a22 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); - wbinvd(); + + /* + * Cache flushing is the most time-consuming step when programming + * the MTRRs. Fortunately, as per the Intel Software Development + * Manual, we can skip it if the processor supports cache self- + * snooping. + */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (boot_cpu_has(X86_FEATURE_PGE)) { @@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); - wbinvd(); + + /* Again, only flush caches if we have to. */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); } static void post_set(void) __releases(set_atomicity_lock) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 94aa1c72ca98..adf9b71386ef 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,10 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 8f6c784141d1..ee48c3fc8a65 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -15,33 +15,66 @@ /* leaf 0xb SMT level */ #define SMT_LEVEL 0 -/* leaf 0xb sub-leaf types */ +/* extended topology sub-leaf types */ #define INVALID_TYPE 0 #define SMT_TYPE 1 #define CORE_TYPE 2 +#define DIE_TYPE 5 #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -int detect_extended_topology_early(struct cpuinfo_x86 *c) -{ #ifdef CONFIG_SMP +unsigned int __max_die_per_package __read_mostly = 1; +EXPORT_SYMBOL(__max_die_per_package); + +/* + * Check if given CPUID extended toplogy "leaf" is implemented + */ +static int check_extended_topology_leaf(int leaf) +{ unsigned int eax, ebx, ecx, edx; - if (c->cpuid_level < 0xb) + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) return -1; - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + return 0; +} +/* + * Return best CPUID Extended Toplogy Leaf supported + */ +static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) +{ + if (c->cpuid_level >= 0x1f) { + if (check_extended_topology_leaf(0x1f) == 0) + return 0x1f; + } - /* - * check if the cpuid leaf 0xb is actually implemented. - */ - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + if (c->cpuid_level >= 0xb) { + if (check_extended_topology_leaf(0xb) == 0) + return 0xb; + } + + return -1; +} +#endif + +int detect_extended_topology_early(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int eax, ebx, ecx, edx; + int leaf; + + leaf = detect_extended_topology_leaf(c); + if (leaf < 0) return -1; set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); /* * initial apic id, which also represents 32-bit extended x2apic id. */ @@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c) } /* - * Check for extended topology enumeration cpuid leaf 0xb and if it + * Check for extended topology enumeration cpuid leaf, and if it * exists, use it for populating initial_apicid and cpu topology * detection. */ @@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width; + unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; unsigned int core_select_mask, core_level_siblings; + unsigned int die_select_mask, die_level_siblings; + int leaf; - if (detect_extended_topology_early(c) < 0) + leaf = detect_extended_topology_leaf(c); + if (leaf < 0) return -1; /* * Populate HT related information from sub-leaf level 0. */ - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + c->initial_apicid = edx; core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); sub_index = 1; do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); /* * Check for the Core type in the implemented sub leaves. @@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c) if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - break; + die_level_siblings = core_level_siblings; + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + } + if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } sub_index++; } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - - c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) - & core_select_mask; - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); + die_select_mask = (~(-1 << die_plus_mask_width)) >> + core_plus_mask_width; + + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, + ht_mask_width) & core_select_mask; + c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, + core_plus_mask_width) & die_select_mask; + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, + die_plus_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); c->x86_max_cores = (core_level_siblings / smp_num_siblings); + __max_die_per_package = (die_level_siblings / core_level_siblings); #endif return 0; } diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c new file mode 100644 index 000000000000..6a204e7336c1 --- /dev/null +++ b/arch/x86/kernel/cpu/umwait.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/syscore_ops.h> +#include <linux/suspend.h> +#include <linux/cpu.h> + +#include <asm/msr.h> + +#define UMWAIT_C02_ENABLE 0 + +#define UMWAIT_CTRL_VAL(max_time, c02_disable) \ + (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \ + ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE)) + +/* + * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, + * umwait max time is 100000 in TSC-quanta and C0.2 is enabled + */ +static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); + +/* + * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in + * the sysfs write functions. + */ +static DEFINE_MUTEX(umwait_lock); + +static void umwait_update_control_msr(void * unused) +{ + lockdep_assert_irqs_disabled(); + wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0); +} + +/* + * The CPU hotplug callback sets the control MSR to the global control + * value. + * + * Disable interrupts so the read of umwait_control_cached and the WRMSR + * are protected against a concurrent sysfs write. Otherwise the sysfs + * write could update the cached value after it had been read on this CPU + * and issue the IPI before the old value had been written. The IPI would + * interrupt, write the new value and after return from IPI the previous + * value would be written by this CPU. + * + * With interrupts disabled the upcoming CPU either sees the new control + * value or the IPI is updating this CPU to the new control value after + * interrupts have been reenabled. + */ +static int umwait_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + umwait_update_control_msr(NULL); + local_irq_enable(); + return 0; +} + +/* + * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which + * is the only active CPU at this time. The MSR is set up on the APs via the + * CPU hotplug callback. + * + * This function is invoked on resume from suspend and hibernation. On + * resume from suspend the restore should be not required, but we neither + * trust the firmware nor does it matter if the same value is written + * again. + */ +static void umwait_syscore_resume(void) +{ + umwait_update_control_msr(NULL); +} + +static struct syscore_ops umwait_syscore_ops = { + .resume = umwait_syscore_resume, +}; + +/* sysfs interface */ + +/* + * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled. + * Otherwise, C0.2 is enabled. + */ +static inline bool umwait_ctrl_c02_enabled(u32 ctrl) +{ + return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE); +} + +static inline u32 umwait_ctrl_max_time(u32 ctrl) +{ + return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; +} + +static inline void umwait_update_control(u32 maxtime, bool c02_enable) +{ + u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; + + if (!c02_enable) + ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE; + + WRITE_ONCE(umwait_control_cached, ctrl); + /* Propagate to all CPUs */ + on_each_cpu(umwait_update_control_msr, NULL, 1); +} + +static ssize_t +enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl)); +} + +static ssize_t enable_c02_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool c02_enable; + u32 ctrl; + int ret; + + ret = kstrtobool(buf, &c02_enable); + if (ret) + return ret; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (c02_enable != umwait_ctrl_c02_enabled(ctrl)) + umwait_update_control(ctrl, c02_enable); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(enable_c02); + +static ssize_t +max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl)); +} + +static ssize_t max_time_store(struct device *kobj, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 max_time, ctrl; + int ret; + + ret = kstrtou32(buf, 0, &max_time); + if (ret) + return ret; + + /* bits[1:0] must be zero */ + if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK) + return -EINVAL; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (max_time != umwait_ctrl_max_time(ctrl)) + umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl)); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(max_time); + +static struct attribute *umwait_attrs[] = { + &dev_attr_enable_c02.attr, + &dev_attr_max_time.attr, + NULL +}; + +static struct attribute_group umwait_attr_group = { + .attrs = umwait_attrs, + .name = "umwait_control", +}; + +static int __init umwait_init(void) +{ + struct device *dev; + int ret; + + if (!boot_cpu_has(X86_FEATURE_WAITPKG)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online", + umwait_cpu_online, NULL); + + register_syscore_ops(&umwait_syscore_ops); + + /* + * Add umwait control interface. Ignore failure, so at least the + * default values are set up in case the machine manages to boot. + */ + dev = cpu_subsys.dev_root; + return sysfs_create_group(&dev->kobj, &umwait_attr_group); +} +device_initcall(umwait_init); diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c new file mode 100644 index 000000000000..8e6f2f4b4afe --- /dev/null +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/sched.h> +#include <linux/sched/clock.h> + +#include <asm/cpufeature.h> + +#include "cpu.h" + +#define MSR_ZHAOXIN_FCR57 0x00001257 + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */ + +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + +static void init_zhaoxin_cap(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + + /* Test for Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* Enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable ACE unit */ + lo |= ACE_FCR; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled ACE h/w crypto\n"); + } + + /* Enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable RNG unit */ + lo |= RNG_ENABLE; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled h/w RNG\n"); + } + + /* + * Store Extended Feature Flags as word 5 of the CPU + * capability bit array + */ + c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001); + } + + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + cpu_detect_cache_sizes(c); +} + +static void early_init_zhaoxin(struct cpuinfo_x86 *c) +{ + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#endif + if (c->x86_power & (1 << 8)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } + + if (c->cpuid_level >= 0x00000001) { + u32 eax, ebx, ecx, edx; + + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + /* + * If HTT (EDX[28]) is set EBX[16:23] contain the number of + * apicids which are reserved per package. Store the resulting + * shift value for the package management code. + */ + if (edx & (1U << 28)) + c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); + } + +} + +static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + +static void init_zhaoxin(struct cpuinfo_x86 *c) +{ + early_init_zhaoxin(c); + init_intel_cacheinfo(c); + detect_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + + if (c->cpuid_level > 9) { + unsigned int eax = cpuid_eax(10); + + /* + * Check for version and the number of counters + * Version(eax[7:0]) can't be 0; + * Counters(eax[15:8]) should be greater than 1; + */ + if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (c->x86 >= 0x6) + init_zhaoxin_cap(c); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#endif + + if (cpu_has(c, X86_FEATURE_VMX)) + zhaoxin_detect_vmx_virtcap(c); +} + +#ifdef CONFIG_X86_32 +static unsigned int +zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + return size; +} +#endif + +static const struct cpu_dev zhaoxin_cpu_dev = { + .c_vendor = "zhaoxin", + .c_ident = { " Shanghai " }, + .c_early_init = early_init_zhaoxin, + .c_init = init_zhaoxin, +#ifdef CONFIG_X86_32 + .legacy_cache_size = zhaoxin_size_cache, +#endif + .c_x86_vendor = X86_VENDOR_ZHAOXIN, +}; + +cpu_dev_register(zhaoxin_cpu_dev); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a166c960bc9e..3108cdc00b29 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -397,22 +397,12 @@ static int putreg(struct task_struct *child, case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_MAX) return -EIO; - /* - * When changing the FS base, use do_arch_prctl_64() - * to set the index to zero and to set the base - * as requested. - */ - if (child->thread.fsbase != value) - return do_arch_prctl_64(child, ARCH_SET_FS, value); + x86_fsbase_write_task(child, value); return 0; case offsetof(struct user_regs_struct,gs_base): - /* - * Exactly the same here as the %fs handling above. - */ if (value >= TASK_SIZE_MAX) return -EIO; - if (child->thread.gsbase != value) - return do_arch_prctl_64(child, ARCH_SET_GS, value); + x86_gsbase_write_task(child, value); return 0; #endif } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 362dd8953f48..e4f4f3c5ccd2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -89,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); +/* representing HT, core, and die siblings of each logical CPU */ +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); +EXPORT_PER_CPU_SYMBOL(cpu_die_map); + DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Per CPU bogomips and other parameters */ @@ -99,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; +static unsigned int logical_die __read_mostly; /* Maximum number of SMT threads on any online core */ int __read_mostly __max_smt_threads = 1; @@ -300,6 +305,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) return -1; } EXPORT_SYMBOL(topology_phys_to_logical_pkg); +/** + * topology_phys_to_logical_die - Map a physical die id to logical + * + * Returns logical die id or -1 if not found + */ +int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) +{ + int cpu; + int proc_id = cpu_data(cur_cpu).phys_proc_id; + + for_each_possible_cpu(cpu) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && c->cpu_die_id == die_id && + c->phys_proc_id == proc_id) + return c->logical_die_id; + } + return -1; +} +EXPORT_SYMBOL(topology_phys_to_logical_die); /** * topology_update_package_map - Update the physical to logical package map @@ -324,6 +349,29 @@ found: cpu_data(cpu).logical_proc_id = new; return 0; } +/** + * topology_update_die_map - Update the physical to logical die map + * @die: The die id as retrieved via CPUID + * @cpu: The cpu for which this is updated + */ +int topology_update_die_map(unsigned int die, unsigned int cpu) +{ + int new; + + /* Already available somewhere? */ + new = topology_phys_to_logical_die(die, cpu); + if (new >= 0) + goto found; + + new = logical_die++; + if (new != die) { + pr_info("CPU %u Converting physical %u to logical die %u\n", + cpu, die, new); + } +found: + cpu_data(cpu).logical_die_id = new; + return 0; +} void __init smp_store_boot_cpu_info(void) { @@ -333,6 +381,7 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; topology_update_package_map(c->phys_proc_id, id); + topology_update_die_map(c->cpu_die_id, id); c->initialized = true; } @@ -387,6 +436,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->phys_proc_id == o->phys_proc_id && + c->cpu_die_id == o->cpu_die_id && per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { if (c->cpu_core_id == o->cpu_core_id) return topology_sane(c, o, "smt"); @@ -398,6 +448,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) } } else if (c->phys_proc_id == o->phys_proc_id && + c->cpu_die_id == o->cpu_die_id && c->cpu_core_id == o->cpu_core_id) { return topology_sane(c, o, "smt"); } @@ -460,6 +511,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } +static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +{ + if ((c->phys_proc_id == o->phys_proc_id) && + (c->cpu_die_id == o->cpu_die_id)) + return true; + return false; +} + + #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC) static inline int x86_sched_itmt_flags(void) { @@ -522,6 +582,7 @@ void set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); + cpumask_set_cpu(cpu, topology_die_cpumask(cpu)); c->booted_cores = 1; return; } @@ -570,6 +631,9 @@ void set_cpu_sibling_map(int cpu) } if (match_pkg(c, o) && !topology_same_node(c, o)) x86_has_numa_in_package = true; + + if ((i == cpu) || (has_mp && match_die(c, o))) + link_mask(topology_die_cpumask, cpu, i); } threads = cpumask_weight(topology_sibling_cpumask(cpu)); @@ -1174,6 +1238,7 @@ static __init void disable_smp(void) physid_set_mask_of_physid(0, &phys_cpu_present_map); cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); + cpumask_set_cpu(0, topology_die_cpumask(0)); } /* @@ -1269,6 +1334,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } @@ -1489,6 +1555,8 @@ static void remove_siblinginfo(int cpu) cpu_data(sibling).booted_cores--; } + for_each_cpu(sibling, topology_die_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_die_cpumask(sibling)); for_each_cpu(sibling, topology_sibling_cpumask(cpu)) cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) @@ -1496,6 +1564,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); + cpumask_clear(topology_die_cpumask(cpu)); c->cpu_core_id = 0; c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9a327d5b6d1f..d78a61408243 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, - [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, - [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 590fcf863006..77d81c1a63e9 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -251,6 +251,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } set_cpu_sibling_map(0); |