diff options
Diffstat (limited to 'arch/x86/kernel')
36 files changed, 771 insertions, 1161 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3e625c61f008..8f4e8fa6ed75 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -136,9 +136,6 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o -obj-y += sysfb.o -obj-$(CONFIG_X86_SYSFB) += sysfb_simplefb.o -obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e55e0c1fad8c..14bcd59bcdee 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -558,10 +558,10 @@ acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end * If a PIC-mode SCI is not recognized or gives spurious IRQ7's * it may require Edge Trigger -- use "acpi_sci=edge" * - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers + * Port 0x4d0-4d1 are ELCR1 and ELCR2, the Edge/Level Control Registers * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. - * ECLR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0) - * ECLR2 is IRQs 8-15 (IRQ 8, 13 must be 0) + * ELCR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0) + * ELCR2 is IRQs 8-15 (IRQ 8, 13 must be 0) */ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) @@ -570,7 +570,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) unsigned int old, new; /* Real old ELCR mask */ - old = inb(0x4d0) | (inb(0x4d1) << 8); + old = inb(PIC_ELCR1) | (inb(PIC_ELCR2) << 8); /* * If we use ACPI to set PCI IRQs, then we should clear ELCR @@ -596,8 +596,8 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) return; pr_warn("setting ELCR to %04x (from %04x)\n", new, old); - outb(new, 0x4d0); - outb(new >> 8, 0x4d1); + outb(new, PIC_ELCR1); + outb(new >> 8, PIC_ELCR2); } int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 9ac696487b13..ed837383de5c 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -331,7 +331,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int i; if (iommu_start == -1) - return -1; + return -ENOMEM; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; @@ -380,13 +380,13 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *s, *ps, *start_sg, *sgmap; - int need = 0, nextneed, i, out, start; + int need = 0, nextneed, i, out, start, ret; unsigned long pages = 0; unsigned int seg_size; unsigned int max_seg_size; if (nents == 0) - return 0; + return -EINVAL; out = 0; start = 0; @@ -414,8 +414,9 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (!iommu_merge || !nextneed || !need || s->offset || (s->length + seg_size > max_seg_size) || (ps->offset + ps->length) % PAGE_SIZE) { - if (dma_map_cont(dev, start_sg, i - start, - sgmap, pages, need) < 0) + ret = dma_map_cont(dev, start_sg, i - start, + sgmap, pages, need); + if (ret < 0) goto error; out++; @@ -432,7 +433,8 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE); ps = s; } - if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) + ret = dma_map_cont(dev, start_sg, i - start, sgmap, pages, need); + if (ret < 0) goto error; out++; flush_gart(); @@ -456,9 +458,7 @@ error: panic("dma_map_sg: overflow on %lu pages\n", pages); iommu_full(dev, pages << PAGE_SHIFT, dir); - for_each_sg(sg, s, nents, i) - s->dma_address = DMA_MAPPING_ERROR; - return 0; + return ret; } /* allocate and map a coherent mapping */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 23dda362dc0f..c92c9c774c0e 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -25,6 +25,8 @@ #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 #define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 +#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 +#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e /* Protect the PCI config register pairs used for SMN and DF indirect access. */ @@ -37,6 +39,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) }, {} }; @@ -58,6 +61,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, {} }; @@ -74,6 +78,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 294ed4392a0e..10562885f5fc 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -109,14 +109,13 @@ static u32 __init allocate_aperture(void) * memory. Unfortunately we cannot move it up because that would * make the IOMMU useless. */ - addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, - aper_size, aper_size); + addr = memblock_phys_alloc_range(aper_size, aper_size, + GART_MIN_ADDR, GART_MAX_ADDR); if (!addr) { pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n", addr, addr + aper_size - 1, aper_size >> 10); return 0; } - memblock_reserve(addr, aper_size); pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n", addr, addr + aper_size - 1, aper_size >> 10); register_nosave_region(addr >> PAGE_SHIFT, diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d262811ce14b..b70344bf6600 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -38,6 +38,7 @@ #include <asm/trace/irq_vectors.h> #include <asm/irq_remapping.h> +#include <asm/pc-conf-reg.h> #include <asm/perf_event.h> #include <asm/x86_init.h> #include <linux/atomic.h> @@ -132,18 +133,14 @@ static int enabled_via_apicbase __ro_after_init; */ static inline void imcr_pic_to_apic(void) { - /* select IMCR register */ - outb(0x70, 0x22); /* NMI and 8259 INTR go through APIC */ - outb(0x01, 0x23); + pc_conf_set(PC_CONF_MPS_IMCR, 0x01); } static inline void imcr_apic_to_pic(void) { - /* select IMCR register */ - outb(0x70, 0x22); /* NMI and 8259 INTR go directly to BSP */ - outb(0x00, 0x23); + pc_conf_set(PC_CONF_MPS_IMCR, 0x00); } #endif diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 39224e035e47..c1bb384935b0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -764,7 +764,7 @@ static bool irq_active_low(int idx) static bool EISA_ELCR(unsigned int irq) { if (irq < nr_legacy_irqs()) { - unsigned int port = 0x4d0 + (irq >> 3); + unsigned int port = PIC_ELCR1 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } apic_printk(APIC_VERBOSE, KERN_INFO diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index fb67ed5e7e6a..c132daabe615 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -1299,7 +1299,7 @@ static void __init print_PIC(void) pr_debug("... PIC ISR: %04x\n", v); - v = inb(0x4d1) << 8 | inb(0x4d0); + v = inb(PIC_ELCR2) << 8 | inb(PIC_ELCR1); pr_debug("... PIC ELCR: %04x\n", v); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b7c003013d41..2131af9f2fa2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -438,7 +438,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) - node = per_cpu(cpu_llc_id, cpu); + node = get_llc_id(cpu); /* * On multi-fabric platform (e.g. Numascale NumaChip) a diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d41b70fe4918..ecfca3bbcd96 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void); static void __init mds_print_mitigation(void); static void __init taa_select_mitigation(void); static void __init srbds_select_mitigation(void); +static void __init l1d_flush_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -76,6 +77,13 @@ EXPORT_SYMBOL_GPL(mds_user_clear); DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); +/* + * Controls whether l1d flush based mitigations are enabled, + * based on hw features and admin setting via boot parameter + * defaults to false + */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); + void __init check_bugs(void) { identify_boot_cpu(); @@ -111,6 +119,7 @@ void __init check_bugs(void) mds_select_mitigation(); taa_select_mitigation(); srbds_select_mitigation(); + l1d_flush_select_mitigation(); /* * As MDS and TAA mitigations are inter-related, print MDS @@ -492,6 +501,34 @@ static int __init srbds_parse_cmdline(char *str) early_param("srbds", srbds_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "L1D Flush : " fmt + +enum l1d_flush_mitigations { + L1D_FLUSH_OFF = 0, + L1D_FLUSH_ON, +}; + +static enum l1d_flush_mitigations l1d_flush_mitigation __initdata = L1D_FLUSH_OFF; + +static void __init l1d_flush_select_mitigation(void) +{ + if (!l1d_flush_mitigation || !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) + return; + + static_branch_enable(&switch_mm_cond_l1d_flush); + pr_info("Conditional flush on switch_mm() enabled\n"); +} + +static int __init l1d_flush_parse_cmdline(char *str) +{ + if (!strcmp(str, "on")) + l1d_flush_mitigation = L1D_FLUSH_ON; + + return 0; +} +early_param("l1d_flush", l1d_flush_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt enum spectre_v1_mitigation { @@ -1215,6 +1252,24 @@ static void task_update_spec_tif(struct task_struct *tsk) speculation_ctrl_update_current(); } +static int l1d_flush_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + + if (!static_branch_unlikely(&switch_mm_cond_l1d_flush)) + return -EPERM; + + switch (ctrl) { + case PR_SPEC_ENABLE: + set_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH); + return 0; + case PR_SPEC_DISABLE: + clear_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH); + return 0; + default: + return -ERANGE; + } +} + static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && @@ -1324,6 +1379,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, return ssb_prctl_set(task, ctrl); case PR_SPEC_INDIRECT_BRANCH: return ib_prctl_set(task, ctrl); + case PR_SPEC_L1D_FLUSH: + return l1d_flush_prctl_set(task, ctrl); default: return -ENODEV; } @@ -1340,6 +1397,17 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) } #endif +static int l1d_flush_prctl_get(struct task_struct *task) +{ + if (!static_branch_unlikely(&switch_mm_cond_l1d_flush)) + return PR_SPEC_FORCE_DISABLE; + + if (test_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH)) + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + else + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; +} + static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { @@ -1390,6 +1458,8 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return ssb_prctl_get(task); case PR_SPEC_INDIRECT_BRANCH: return ib_prctl_get(task); + case PR_SPEC_L1D_FLUSH: + return l1d_flush_prctl_get(task); default: return -ENODEV; } diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index d66af2950e06..b5e36bd0425b 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->priv = base->nb; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) id4_regs->id = c->apicid >> index_msb; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64b805bd6a54..0f8885949e8c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -79,6 +79,12 @@ EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; +u16 get_llc_id(unsigned int cpu) +{ + return per_cpu(cpu_llc_id, cpu); +} +EXPORT_SYMBOL_GPL(get_llc_id); + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 22791aadc085..8cb7816d03b4 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -817,7 +817,10 @@ log_it: if (mca_cfg.dont_log_ce && !mce_usable_address(&m)) goto clear_it; - mce_log(&m); + if (flags & MCP_QUEUE_LOG) + mce_gen_pool_add(&m); + else + mce_log(&m); clear_it: /* @@ -1639,10 +1642,12 @@ static void __mcheck_cpu_init_generic(void) m_fl = MCP_DONTLOG; /* - * Log the machine checks left over from the previous reset. + * Log the machine checks left over from the previous reset. Log them + * only, do not start processing them. That will happen in mcheck_late_init() + * when all consumers have been registered on the notifier chain. */ bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC | m_fl, &all_banks); + machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); cr4_set_bits(X86_CR4_MCE); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 4e86d97f9653..0bfc14041bbb 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -235,7 +235,7 @@ static void __maybe_unused raise_mce(struct mce *m) unsigned long start; int cpu; - get_online_cpus(); + cpus_read_lock(); cpumask_copy(mce_inject_cpumask, cpu_online_mask); cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); for_each_online_cpu(cpu) { @@ -269,7 +269,7 @@ static void __maybe_unused raise_mce(struct mce *m) } raise_local(); put_cpu(); - put_online_cpus(); + cpus_read_unlock(); } else { preempt_disable(); raise_local(); @@ -529,7 +529,7 @@ static void do_inject(void) cpu = get_nbc_for_node(topology_die_id(cpu)); } - get_online_cpus(); + cpus_read_lock(); if (!cpu_online(cpu)) goto err; @@ -553,7 +553,7 @@ static void do_inject(void) } err: - put_online_cpus(); + cpus_read_unlock(); } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6a6318e9590c..efb69be41ab1 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -55,7 +55,7 @@ LIST_HEAD(microcode_cache); * All non cpu-hotplug-callback call sites use: * * - microcode_mutex to synchronize with each other; - * - get/put_online_cpus() to synchronize with + * - cpus_read_lock/unlock() to synchronize with * the cpu-hotplug-callback call sites. * * We guarantee that only a single cpu is being @@ -431,7 +431,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, return ret; } - get_online_cpus(); + cpus_read_lock(); mutex_lock(µcode_mutex); if (do_microcode_update(buf, len) == 0) @@ -441,7 +441,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, perf_check_microcode(); mutex_unlock(µcode_mutex); - put_online_cpus(); + cpus_read_unlock(); return ret; } @@ -629,7 +629,7 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; - get_online_cpus(); + cpus_read_lock(); ret = check_online_cpus(); if (ret) @@ -644,7 +644,7 @@ static ssize_t reload_store(struct device *dev, mutex_unlock(µcode_mutex); put: - put_online_cpus(); + cpus_read_unlock(); if (ret == 0) ret = size; @@ -853,14 +853,14 @@ static int __init microcode_init(void) if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); - get_online_cpus(); + cpus_read_lock(); mutex_lock(µcode_mutex); error = subsys_interface_register(&mc_cpu_interface); if (!error) perf_check_microcode(); mutex_unlock(µcode_mutex); - put_online_cpus(); + cpus_read_unlock(); if (error) goto out_pdev; @@ -892,13 +892,13 @@ static int __init microcode_init(void) &cpu_root_microcode_group); out_driver: - get_online_cpus(); + cpus_read_lock(); mutex_lock(µcode_mutex); subsys_interface_unregister(&mc_cpu_interface); mutex_unlock(µcode_mutex); - put_online_cpus(); + cpus_read_unlock(); out_pdev: platform_device_unregister(microcode_pdev); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c890d67a64ad..e095c28d27ae 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,7 +17,6 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/i8253.h> -#include <linux/panic_notifier.h> #include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> @@ -36,10 +35,7 @@ /* Is Linux running as the root partition? */ bool hv_root_partition; -EXPORT_SYMBOL_GPL(hv_root_partition); - struct ms_hyperv_info ms_hyperv; -EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); @@ -65,14 +61,12 @@ void hv_setup_vmbus_handler(void (*handler)(void)) { vmbus_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); void hv_remove_vmbus_handler(void) { /* We have no way to deallocate the interrupt gate */ vmbus_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); /* * Routines to do per-architecture handling of stimer0 @@ -107,25 +101,21 @@ void hv_setup_kexec_handler(void (*handler)(void)) { hv_kexec_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); void hv_remove_kexec_handler(void) { hv_kexec_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) { hv_crash_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_crash_handler); void hv_remove_crash_handler(void) { hv_crash_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_crash_handler); #ifdef CONFIG_KEXEC_CORE static void hv_machine_shutdown(void) @@ -335,16 +325,6 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.nested_features); } - /* - * Hyper-V expects to get crash register data or kmsg when - * crash enlightment is available and system crashes. Set - * crash_kexec_post_notifiers to be true to make sure that - * calling crash enlightment interface before running kdump - * kernel. - */ - if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) - crash_kexec_post_notifiers = true; - #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { @@ -373,10 +353,17 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { + /* + * Writing to synthetic MSR 0x40000118 updates/changes the + * guest visible CPUIDs. Setting bit 0 of this MSR enables + * guests to report invariant TSC feature through CPUID + * instruction, CPUID 0x800000007/EDX, bit 8. See code in + * early_init_intel() where this bit is examined. The + * setting of this MSR bit should happen before init_intel() + * is called. + */ wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1); setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); - } else { - mark_tsc_unstable("running on Hyper-V"); } /* @@ -437,6 +424,13 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); #endif + /* + * TSC should be marked as unstable only after Hyper-V + * clocksource has been initialized. This ensures that the + * stability of the sched_clock is not altered. + */ + if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + mark_tsc_unstable("running on Hyper-V"); } static bool __init ms_hyperv_x2apic_available(void) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index a76694bffe86..2746cac9d8a9 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -336,7 +336,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, replace = -1; /* No CPU hotplug when we change MTRR entries */ - get_online_cpus(); + cpus_read_lock(); /* Search for existing MTRR */ mutex_lock(&mtrr_mutex); @@ -398,7 +398,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; out: mutex_unlock(&mtrr_mutex); - put_online_cpus(); + cpus_read_unlock(); return error; } @@ -485,7 +485,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ - get_online_cpus(); + cpus_read_lock(); mutex_lock(&mtrr_mutex); if (reg < 0) { /* Search for existing MTRR */ @@ -520,7 +520,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) error = reg; out: mutex_unlock(&mtrr_mutex); - put_online_cpus(); + cpus_read_unlock(); return error; } diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 23001ae03e82..4b8813bafffd 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -57,128 +57,57 @@ static void mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); -#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) +#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) -struct rdt_resource rdt_resources_all[] = { +struct rdt_hw_resource rdt_resources_all[] = { [RDT_RESOURCE_L3] = { - .rid = RDT_RESOURCE_L3, - .name = "L3", - .domains = domain_init(RDT_RESOURCE_L3), - .msr_base = MSR_IA32_L3_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 1, - .cbm_idx_offset = 0, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L3DATA] = - { - .rid = RDT_RESOURCE_L3DATA, - .name = "L3DATA", - .domains = domain_init(RDT_RESOURCE_L3DATA), - .msr_base = MSR_IA32_L3_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 0, + .r_resctrl = { + .rid = RDT_RESOURCE_L3, + .name = "L3", + .cache_level = 3, + .cache = { + .min_cbm_bits = 1, + }, + .domains = domain_init(RDT_RESOURCE_L3), + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L3CODE] = - { - .rid = RDT_RESOURCE_L3CODE, - .name = "L3CODE", - .domains = domain_init(RDT_RESOURCE_L3CODE), .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, - .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 1, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, }, [RDT_RESOURCE_L2] = { - .rid = RDT_RESOURCE_L2, - .name = "L2", - .domains = domain_init(RDT_RESOURCE_L2), - .msr_base = MSR_IA32_L2_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 1, - .cbm_idx_offset = 0, + .r_resctrl = { + .rid = RDT_RESOURCE_L2, + .name = "L2", + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + }, + .domains = domain_init(RDT_RESOURCE_L2), + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L2DATA] = - { - .rid = RDT_RESOURCE_L2DATA, - .name = "L2DATA", - .domains = domain_init(RDT_RESOURCE_L2DATA), .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, - .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 0, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L2CODE] = - { - .rid = RDT_RESOURCE_L2CODE, - .name = "L2CODE", - .domains = domain_init(RDT_RESOURCE_L2CODE), - .msr_base = MSR_IA32_L2_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 1, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, }, [RDT_RESOURCE_MBA] = { - .rid = RDT_RESOURCE_MBA, - .name = "MB", - .domains = domain_init(RDT_RESOURCE_MBA), - .cache_level = 3, - .parse_ctrlval = parse_bw, - .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, + .r_resctrl = { + .rid = RDT_RESOURCE_MBA, + .name = "MB", + .cache_level = 3, + .domains = domain_init(RDT_RESOURCE_MBA), + .parse_ctrlval = parse_bw, + .format_str = "%d=%*u", + .fflags = RFTYPE_RES_MB, + }, }, }; -static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid) -{ - return closid * r->cache.cbm_idx_mult + r->cache.cbm_idx_offset; -} - /* * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs * as they do not have CPUID enumeration support for Cache allocation. @@ -199,7 +128,8 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid) */ static inline void cache_alloc_hsw_probe(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r = &hw_res->r_resctrl; u32 l, h, max_cbm = BIT_MASK(20) - 1; if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0)) @@ -211,7 +141,7 @@ static inline void cache_alloc_hsw_probe(void) if (l != max_cbm) return; - r->num_closid = 4; + hw_res->num_closid = 4; r->default_ctrl = max_cbm; r->cache.cbm_len = 20; r->cache.shareable_bits = 0xc0000; @@ -225,7 +155,7 @@ static inline void cache_alloc_hsw_probe(void) bool is_mba_sc(struct rdt_resource *r) { if (!r) - return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc; + return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; return r->membw.mba_sc; } @@ -253,12 +183,13 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r) static bool __get_mem_config_intel(struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_3_eax eax; union cpuid_0x10_x_edx edx; u32 ebx, ecx, max_delay; cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); - r->num_closid = edx.split.cos_max + 1; + hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; r->default_ctrl = MAX_MBA_BW; r->membw.arch_needs_linear = true; @@ -287,12 +218,13 @@ static bool __get_mem_config_intel(struct rdt_resource *r) static bool __rdt_get_mem_config_amd(struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_3_eax eax; union cpuid_0x10_x_edx edx; u32 ebx, ecx; cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full); - r->num_closid = edx.split.cos_max + 1; + hw_res->num_closid = edx.split.cos_max + 1; r->default_ctrl = MAX_MBA_BW_AMD; /* AMD does not use delay */ @@ -317,12 +249,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_1_eax eax; union cpuid_0x10_x_edx edx; u32 ebx, ecx; cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full); - r->num_closid = edx.split.cos_max + 1; + hw_res->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; r->cache.shareable_bits = ebx & r->default_ctrl; @@ -331,43 +264,35 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->alloc_enabled = true; } -static void rdt_get_cdp_config(int level, int type) +static void rdt_get_cdp_config(int level) { - struct rdt_resource *r_l = &rdt_resources_all[level]; - struct rdt_resource *r = &rdt_resources_all[type]; - - r->num_closid = r_l->num_closid / 2; - r->cache.cbm_len = r_l->cache.cbm_len; - r->default_ctrl = r_l->default_ctrl; - r->cache.shareable_bits = r_l->cache.shareable_bits; - r->data_width = (r->cache.cbm_len + 3) / 4; - r->alloc_capable = true; /* * By default, CDP is disabled. CDP can be enabled by mount parameter * "cdp" during resctrl file system mount time. */ - r->alloc_enabled = false; + rdt_resources_all[level].cdp_enabled = false; + rdt_resources_all[level].r_resctrl.cdp_capable = true; } static void rdt_get_cdp_l3_config(void) { - rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA); - rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE); + rdt_get_cdp_config(RDT_RESOURCE_L3); } static void rdt_get_cdp_l2_config(void) { - rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA); - rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); + rdt_get_cdp_config(RDT_RESOURCE_L2); } static void mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) - wrmsrl(r->msr_base + i, d->ctrl_val[i]); + wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } /* @@ -389,19 +314,23 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); /* Write the delay values for mba. */ for (i = m->low; i < m->high; i++) - wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r)); + wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r)); } static void cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) - wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]); + wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) @@ -417,16 +346,22 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) return NULL; } +u32 resctrl_arch_get_num_closid(struct rdt_resource *r) +{ + return resctrl_to_arch_res(r)->num_closid; +} + void rdt_ctrl_update(void *arg) { struct msr_param *m = arg; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); struct rdt_resource *r = m->res; int cpu = smp_processor_id(); struct rdt_domain *d; d = get_domain_from_cpu(cpu, r); if (d) { - r->msr_update(d, m, r); + hw_res->msr_update(d, m, r); return; } pr_warn_once("cpu %d not found in any domain for resource %s\n", @@ -468,6 +403,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); int i; /* @@ -476,7 +412,7 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) * For Memory Allocation: Set b/w requested to 100% * and the bandwidth in MBps to U32_MAX */ - for (i = 0; i < r->num_closid; i++, dc++, dm++) { + for (i = 0; i < hw_res->num_closid; i++, dc++, dm++) { *dc = r->default_ctrl; *dm = MBA_MAX_MBPS; } @@ -484,26 +420,30 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct msr_param m; u32 *dc, *dm; - dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL); + dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), + GFP_KERNEL); if (!dc) return -ENOMEM; - dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL); + dm = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->mbps_val), + GFP_KERNEL); if (!dm) { kfree(dc); return -ENOMEM; } - d->ctrl_val = dc; - d->mbps_val = dm; + hw_dom->ctrl_val = dc; + hw_dom->mbps_val = dm; setup_default_ctrlval(r, dc, dm); m.low = 0; - m.high = r->num_closid; - r->msr_update(d, &m, r); + m.high = hw_res->num_closid; + hw_res->msr_update(d, &m, r); return 0; } @@ -560,6 +500,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) { int id = get_cpu_cacheinfo_id(cpu, r->cache_level); struct list_head *add_pos = NULL; + struct rdt_hw_domain *hw_dom; struct rdt_domain *d; d = rdt_find_domain(r, id, &add_pos); @@ -575,10 +516,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; } - d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); - if (!d) + hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); + if (!hw_dom) return; + d = &hw_dom->d_resctrl; d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); @@ -607,6 +549,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) static void domain_remove_cpu(int cpu, struct rdt_resource *r) { int id = get_cpu_cacheinfo_id(cpu, r->cache_level); + struct rdt_hw_domain *hw_dom; struct rdt_domain *d; d = rdt_find_domain(r, id, NULL); @@ -614,6 +557,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) pr_warn("Couldn't find cache id for CPU %d\n", cpu); return; } + hw_dom = resctrl_to_arch_dom(d); cpumask_clear_cpu(cpu, &d->cpu_mask); if (cpumask_empty(&d->cpu_mask)) { @@ -646,16 +590,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (d->plr) d->plr->d = NULL; - kfree(d->ctrl_val); - kfree(d->mbps_val); + kfree(hw_dom->ctrl_val); + kfree(hw_dom->mbps_val); bitmap_free(d->rmid_busy_llc); kfree(d->mbm_total); kfree(d->mbm_local); - kfree(d); + kfree(hw_dom); return; } - if (r == &rdt_resources_all[RDT_RESOURCE_L3]) { + if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) { if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0); @@ -732,13 +676,8 @@ static int resctrl_offline_cpu(unsigned int cpu) static __init void rdt_init_padding(void) { struct rdt_resource *r; - int cl; for_each_alloc_capable_rdt_resource(r) { - cl = strlen(r->name); - if (cl > max_name_width) - max_name_width = cl; - if (r->data_width > max_data_width) max_data_width = r->data_width; } @@ -827,19 +766,22 @@ static bool __init rdt_cpu_has(int flag) static __init bool get_mem_config(void) { + struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; + if (!rdt_cpu_has(X86_FEATURE_MBA)) return false; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]); + return __get_mem_config_intel(&hw_res->r_resctrl); else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]); + return __rdt_get_mem_config_amd(&hw_res->r_resctrl); return false; } static __init bool get_rdt_alloc_resources(void) { + struct rdt_resource *r; bool ret = false; if (rdt_alloc_capable) @@ -849,14 +791,16 @@ static __init bool get_rdt_alloc_resources(void) return false; if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { - rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + rdt_get_cache_alloc_cfg(1, r); if (rdt_cpu_has(X86_FEATURE_CDP_L3)) rdt_get_cdp_l3_config(); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { /* CPUID 0x10.2 fields are same format at 0x10.1 */ - rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); + r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl; + rdt_get_cache_alloc_cfg(2, r); if (rdt_cpu_has(X86_FEATURE_CDP_L2)) rdt_get_cdp_l2_config(); ret = true; @@ -870,6 +814,8 @@ static __init bool get_rdt_alloc_resources(void) static __init bool get_rdt_mon_resources(void) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) @@ -880,7 +826,7 @@ static __init bool get_rdt_mon_resources(void) if (!rdt_mon_features) return false; - return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]); + return !rdt_get_mon_l3_config(r); } static __init void __check_quirks_intel(void) @@ -918,42 +864,40 @@ static __init bool get_rdt_resources(void) static __init void rdt_init_res_defs_intel(void) { + struct rdt_hw_resource *hw_res; struct rdt_resource *r; for_each_rdt_resource(r) { + hw_res = resctrl_to_arch_res(r); + if (r->rid == RDT_RESOURCE_L3 || - r->rid == RDT_RESOURCE_L3DATA || - r->rid == RDT_RESOURCE_L3CODE || - r->rid == RDT_RESOURCE_L2 || - r->rid == RDT_RESOURCE_L2DATA || - r->rid == RDT_RESOURCE_L2CODE) { + r->rid == RDT_RESOURCE_L2) { r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_empty_bitmaps = false; r->cache.arch_has_per_cpu_cfg = false; } else if (r->rid == RDT_RESOURCE_MBA) { - r->msr_base = MSR_IA32_MBA_THRTL_BASE; - r->msr_update = mba_wrmsr_intel; + hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; + hw_res->msr_update = mba_wrmsr_intel; } } } static __init void rdt_init_res_defs_amd(void) { + struct rdt_hw_resource *hw_res; struct rdt_resource *r; for_each_rdt_resource(r) { + hw_res = resctrl_to_arch_res(r); + if (r->rid == RDT_RESOURCE_L3 || - r->rid == RDT_RESOURCE_L3DATA || - r->rid == RDT_RESOURCE_L3CODE || - r->rid == RDT_RESOURCE_L2 || - r->rid == RDT_RESOURCE_L2DATA || - r->rid == RDT_RESOURCE_L2CODE) { + r->rid == RDT_RESOURCE_L2) { r->cache.arch_has_sparse_bitmaps = true; r->cache.arch_has_empty_bitmaps = true; r->cache.arch_has_per_cpu_cfg = true; } else if (r->rid == RDT_RESOURCE_MBA) { - r->msr_base = MSR_IA32_MBA_BW_BASE; - r->msr_update = mba_wrmsr_amd; + hw_res->msr_base = MSR_IA32_MBA_BW_BASE; + hw_res->msr_update = mba_wrmsr_amd; } } } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index c877642e8a14..87666275eed9 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -57,20 +57,23 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return true; } -int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, +int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d) { + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; unsigned long bw_val; - if (d->have_new_ctrl) { + cfg = &d->staged_config[s->conf_type]; + if (cfg->have_new_ctrl) { rdt_last_cmd_printf("Duplicate domain %d\n", d->id); return -EINVAL; } if (!bw_validate(data->buf, &bw_val, r)) return -EINVAL; - d->new_ctrl = bw_val; - d->have_new_ctrl = true; + cfg->new_ctrl = bw_val; + cfg->have_new_ctrl = true; return 0; } @@ -125,13 +128,16 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, +int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; u32 cbm_val; - if (d->have_new_ctrl) { + cfg = &d->staged_config[s->conf_type]; + if (cfg->have_new_ctrl) { rdt_last_cmd_printf("Duplicate domain %d\n", d->id); return -EINVAL; } @@ -160,12 +166,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, * The CBM may not overlap with the CBM of another closid if * either is exclusive. */ - if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { rdt_last_cmd_puts("Overlaps with exclusive group\n"); return -EINVAL; } - if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { rdt_last_cmd_puts("Overlaps with other group\n"); @@ -173,8 +179,8 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, } } - d->new_ctrl = cbm_val; - d->have_new_ctrl = true; + cfg->new_ctrl = cbm_val; + cfg->have_new_ctrl = true; return 0; } @@ -185,9 +191,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, * separated by ";". The "id" is in decimal, and must match one of * the "id"s for this resource. */ -static int parse_line(char *line, struct rdt_resource *r, +static int parse_line(char *line, struct resctrl_schema *s, struct rdtgroup *rdtgrp) { + enum resctrl_conf_type t = s->conf_type; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; struct rdt_parse_data data; char *dom = NULL, *id; struct rdt_domain *d; @@ -213,9 +222,10 @@ next: if (d->id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; - if (r->parse_ctrlval(&data, r, d)) + if (r->parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + cfg = &d->staged_config[t]; /* * In pseudo-locking setup mode and just * parsed a valid CBM that should be @@ -224,9 +234,9 @@ next: * the required initialization for single * region and return. */ - rdtgrp->plr->r = r; + rdtgrp->plr->s = s; rdtgrp->plr->d = d; - rdtgrp->plr->cbm = d->new_ctrl; + rdtgrp->plr->cbm = cfg->new_ctrl; d->plr = rdtgrp->plr; return 0; } @@ -236,28 +246,72 @@ next: return -EINVAL; } -int update_domains(struct rdt_resource *r, int closid) +static u32 get_config_index(u32 closid, enum resctrl_conf_type type) { + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return closid * 2 + 1; + case CDP_DATA: + return closid * 2; + } +} + +static bool apply_config(struct rdt_hw_domain *hw_dom, + struct resctrl_staged_config *cfg, u32 idx, + cpumask_var_t cpu_mask, bool mba_sc) +{ + struct rdt_domain *dom = &hw_dom->d_resctrl; + u32 *dc = !mba_sc ? hw_dom->ctrl_val : hw_dom->mbps_val; + + if (cfg->new_ctrl != dc[idx]) { + cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); + dc[idx] = cfg->new_ctrl; + + return true; + } + + return false; +} + +int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) +{ + struct resctrl_staged_config *cfg; + struct rdt_hw_domain *hw_dom; struct msr_param msr_param; + enum resctrl_conf_type t; cpumask_var_t cpu_mask; struct rdt_domain *d; bool mba_sc; - u32 *dc; int cpu; + u32 idx; if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - msr_param.low = closid; - msr_param.high = msr_param.low + 1; - msr_param.res = r; - mba_sc = is_mba_sc(r); + msr_param.res = NULL; list_for_each_entry(d, &r->domains, list) { - dc = !mba_sc ? d->ctrl_val : d->mbps_val; - if (d->have_new_ctrl && d->new_ctrl != dc[closid]) { - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - dc[closid] = d->new_ctrl; + hw_dom = resctrl_to_arch_dom(d); + for (t = 0; t < CDP_NUM_TYPES; t++) { + cfg = &hw_dom->d_resctrl.staged_config[t]; + if (!cfg->have_new_ctrl) + continue; + + idx = get_config_index(closid, t); + if (!apply_config(hw_dom, cfg, idx, cpu_mask, mba_sc)) + continue; + + if (!msr_param.res) { + msr_param.low = idx; + msr_param.high = msr_param.low + 1; + msr_param.res = r; + } else { + msr_param.low = min(msr_param.low, idx); + msr_param.high = max(msr_param.high, idx + 1); + } } } @@ -284,11 +338,11 @@ done: static int rdtgroup_parse_resource(char *resname, char *tok, struct rdtgroup *rdtgrp) { - struct rdt_resource *r; + struct resctrl_schema *s; - for_each_alloc_enabled_rdt_resource(r) { - if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid) - return parse_line(tok, r, rdtgrp); + list_for_each_entry(s, &resctrl_schema_all, list) { + if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid) + return parse_line(tok, s, rdtgrp); } rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); return -EINVAL; @@ -297,6 +351,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { + struct resctrl_schema *s; struct rdtgroup *rdtgrp; struct rdt_domain *dom; struct rdt_resource *r; @@ -327,9 +382,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, goto out; } - for_each_alloc_enabled_rdt_resource(r) { - list_for_each_entry(dom, &r->domains, list) - dom->have_new_ctrl = false; + list_for_each_entry(s, &resctrl_schema_all, list) { + list_for_each_entry(dom, &s->res->domains, list) + memset(dom->staged_config, 0, sizeof(dom->staged_config)); } while ((tok = strsep(&buf, "\n")) != NULL) { @@ -349,8 +404,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, goto out; } - for_each_alloc_enabled_rdt_resource(r) { - ret = update_domains(r, rdtgrp->closid); + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; + ret = resctrl_arch_update_domains(r, rdtgrp->closid); if (ret) goto out; } @@ -371,19 +427,31 @@ out: return ret ?: nbytes; } -static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid) +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type type) +{ + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + u32 idx = get_config_index(closid, type); + + if (!is_mba_sc(r)) + return hw_dom->ctrl_val[idx]; + return hw_dom->mbps_val[idx]; +} + +static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) { + struct rdt_resource *r = schema->res; struct rdt_domain *dom; bool sep = false; u32 ctrl_val; - seq_printf(s, "%*s:", max_name_width, r->name); + seq_printf(s, "%*s:", max_name_width, schema->name); list_for_each_entry(dom, &r->domains, list) { if (sep) seq_puts(s, ";"); - ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] : - dom->mbps_val[closid]); + ctrl_val = resctrl_arch_get_config(r, dom, closid, + schema->conf_type); seq_printf(s, r->format_str, dom->id, max_data_width, ctrl_val); sep = true; @@ -394,16 +462,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid) int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { + struct resctrl_schema *schema; struct rdtgroup *rdtgrp; - struct rdt_resource *r; int ret = 0; u32 closid; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (rdtgrp) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - for_each_alloc_enabled_rdt_resource(r) - seq_printf(s, "%s:uninitialized\n", r->name); + list_for_each_entry(schema, &resctrl_schema_all, list) { + seq_printf(s, "%s:uninitialized\n", schema->name); + } } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { if (!rdtgrp->plr->d) { rdt_last_cmd_clear(); @@ -411,15 +480,15 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, ret = -ENODEV; } else { seq_printf(s, "%s:%d=%x\n", - rdtgrp->plr->r->name, + rdtgrp->plr->s->res->name, rdtgrp->plr->d->id, rdtgrp->plr->cbm); } } else { closid = rdtgrp->closid; - for_each_alloc_enabled_rdt_resource(r) { - if (closid < r->num_closid) - show_doms(s, r, closid); + list_for_each_entry(schema, &resctrl_schema_all, list) { + if (closid < schema->num_closid) + show_doms(s, schema, closid); } } } else { @@ -449,6 +518,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; + struct rdt_hw_resource *hw_res; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; @@ -468,7 +538,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) domid = md.u.domid; evtid = md.u.evtid; - r = &rdt_resources_all[resid]; + hw_res = &rdt_resources_all[resid]; + r = &hw_res->r_resctrl; d = rdt_find_domain(r, domid, NULL); if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; @@ -482,7 +553,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) else if (rr.val & RMID_VAL_UNAVAIL) seq_puts(m, "Unavailable\n"); else - seq_printf(m, "%llu\n", rr.val * r->mon_scale); + seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale); out: rdtgroup_kn_unlock(of->kn); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 6a5f60a37219..1d647188a43b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_RESCTRL_INTERNAL_H #define _ASM_X86_RESCTRL_INTERNAL_H +#include <linux/resctrl.h> #include <linux/sched.h> #include <linux/kernfs.h> #include <linux/fs_context.h> @@ -109,6 +110,7 @@ extern unsigned int resctrl_cqm_threshold; extern bool rdt_alloc_capable; extern bool rdt_mon_capable; extern unsigned int rdt_mon_features; +extern struct list_head resctrl_schema_all; enum rdt_group_type { RDTCTRL_GROUP = 0, @@ -161,8 +163,8 @@ struct mongroup { /** * struct pseudo_lock_region - pseudo-lock region information - * @r: RDT resource to which this pseudo-locked region - * belongs + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs * @d: RDT domain to which this pseudo-locked region * belongs * @cbm: bitmask of the pseudo-locked region @@ -182,7 +184,7 @@ struct mongroup { * @pm_reqs: Power management QoS requests related to this region */ struct pseudo_lock_region { - struct rdt_resource *r; + struct resctrl_schema *s; struct rdt_domain *d; u32 cbm; wait_queue_head_t lock_thread_wq; @@ -303,44 +305,25 @@ struct mbm_state { }; /** - * struct rdt_domain - group of cpus sharing an RDT resource - * @list: all instances of this resource - * @id: unique id for this instance - * @cpu_mask: which cpus share this resource - * @rmid_busy_llc: - * bitmap of which limbo RMIDs are above threshold - * @mbm_total: saved state for MBM total bandwidth - * @mbm_local: saved state for MBM local bandwidth - * @mbm_over: worker to periodically read MBM h/w counters - * @cqm_limbo: worker to periodically read CQM h/w counters - * @mbm_work_cpu: - * worker cpu for MBM h/w counters - * @cqm_work_cpu: - * worker cpu for CQM h/w counters + * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share + * a resource + * @d_resctrl: Properties exposed to the resctrl file system * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps - * @new_ctrl: new ctrl value to be loaded - * @have_new_ctrl: did user provide new_ctrl for this domain - * @plr: pseudo-locked region (if any) associated with domain + * + * Members of this structure are accessed via helpers that provide abstraction. */ -struct rdt_domain { - struct list_head list; - int id; - struct cpumask cpu_mask; - unsigned long *rmid_busy_llc; - struct mbm_state *mbm_total; - struct mbm_state *mbm_local; - struct delayed_work mbm_over; - struct delayed_work cqm_limbo; - int mbm_work_cpu; - int cqm_work_cpu; +struct rdt_hw_domain { + struct rdt_domain d_resctrl; u32 *ctrl_val; u32 *mbps_val; - u32 new_ctrl; - bool have_new_ctrl; - struct pseudo_lock_region *plr; }; +static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) +{ + return container_of(r, struct rdt_hw_domain, d_resctrl); +} + /** * struct msr_param - set a range of MSRs from a domain * @res: The resource to use @@ -349,69 +332,8 @@ struct rdt_domain { */ struct msr_param { struct rdt_resource *res; - int low; - int high; -}; - -/** - * struct rdt_cache - Cache allocation related data - * @cbm_len: Length of the cache bit mask - * @min_cbm_bits: Minimum number of consecutive bits to be set - * @cbm_idx_mult: Multiplier of CBM index - * @cbm_idx_offset: Offset of CBM index. CBM index is computed by: - * closid * cbm_idx_multi + cbm_idx_offset - * in a cache bit mask - * @shareable_bits: Bitmask of shareable resource with other - * executing entities - * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. - * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. - * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache - * level has CPU scope. - */ -struct rdt_cache { - unsigned int cbm_len; - unsigned int min_cbm_bits; - unsigned int cbm_idx_mult; - unsigned int cbm_idx_offset; - unsigned int shareable_bits; - bool arch_has_sparse_bitmaps; - bool arch_has_empty_bitmaps; - bool arch_has_per_cpu_cfg; -}; - -/** - * enum membw_throttle_mode - System's memory bandwidth throttling mode - * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system - * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core - * always using smallest bandwidth percentage - * assigned to threads, aka "max throttling" - * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread - */ -enum membw_throttle_mode { - THREAD_THROTTLE_UNDEFINED = 0, - THREAD_THROTTLE_MAX, - THREAD_THROTTLE_PER_THREAD, -}; - -/** - * struct rdt_membw - Memory bandwidth allocation related data - * @min_bw: Minimum memory bandwidth percentage user can request - * @bw_gran: Granularity at which the memory bandwidth is allocated - * @delay_linear: True if memory B/W delay is in linear scale - * @arch_needs_linear: True if we can't configure non-linear resources - * @throttle_mode: Bandwidth throttling mode when threads request - * different memory bandwidths - * @mba_sc: True if MBA software controller(mba_sc) is enabled - * @mb_map: Mapping of memory B/W percentage to memory B/W delay - */ -struct rdt_membw { - u32 min_bw; - u32 bw_gran; - u32 delay_linear; - bool arch_needs_linear; - enum membw_throttle_mode throttle_mode; - bool mba_sc; - u32 *mb_map; + u32 low; + u32 high; }; static inline bool is_llc_occupancy_enabled(void) @@ -446,111 +368,103 @@ struct rdt_parse_data { }; /** - * struct rdt_resource - attributes of an RDT resource - * @rid: The index of the resource - * @alloc_enabled: Is allocation enabled on this machine - * @mon_enabled: Is monitoring enabled for this feature - * @alloc_capable: Is allocation available on this machine - * @mon_capable: Is monitor feature available on this machine - * @name: Name to use in "schemata" file - * @num_closid: Number of CLOSIDs available - * @cache_level: Which cache level defines scope of this resource - * @default_ctrl: Specifies default cache cbm or memory B/W percent. + * struct rdt_hw_resource - arch private attributes of a resctrl resource + * @r_resctrl: Attributes of the resource used directly by resctrl. + * @num_closid: Maximum number of closid this hardware can support, + * regardless of CDP. This is exposed via + * resctrl_arch_get_num_closid() to avoid confusion + * with struct resctrl_schema's property of the same name, + * which has been corrected for features like CDP. * @msr_base: Base MSR address for CBMs * @msr_update: Function pointer to update QOS MSRs - * @data_width: Character width of data when displaying - * @domains: All domains for this resource - * @cache: Cache allocation related data - * @membw: If the component has bandwidth controls, their properties. - * @format_str: Per resource format string to show domain value - * @parse_ctrlval: Per resource function pointer to parse control values - * @evt_list: List of monitoring events - * @num_rmid: Number of RMIDs available * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. - * @fflags: flags to choose base and info files + * @cdp_enabled: CDP state of this resource + * + * Members of this structure are either private to the architecture + * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. + * msr_update and msr_base. */ -struct rdt_resource { - int rid; - bool alloc_enabled; - bool mon_enabled; - bool alloc_capable; - bool mon_capable; - char *name; - int num_closid; - int cache_level; - u32 default_ctrl; +struct rdt_hw_resource { + struct rdt_resource r_resctrl; + u32 num_closid; unsigned int msr_base; void (*msr_update) (struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); - int data_width; - struct list_head domains; - struct rdt_cache cache; - struct rdt_membw membw; - const char *format_str; - int (*parse_ctrlval)(struct rdt_parse_data *data, - struct rdt_resource *r, - struct rdt_domain *d); - struct list_head evt_list; - int num_rmid; unsigned int mon_scale; unsigned int mbm_width; - unsigned long fflags; + bool cdp_enabled; }; -int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, +static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) +{ + return container_of(r, struct rdt_hw_resource, r_resctrl); +} + +int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d); -int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, +int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d); extern struct mutex rdtgroup_mutex; -extern struct rdt_resource rdt_resources_all[]; +extern struct rdt_hw_resource rdt_resources_all[]; extern struct rdtgroup rdtgroup_default; DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); extern struct dentry *debugfs_resctrl; -enum { +enum resctrl_res_level { RDT_RESOURCE_L3, - RDT_RESOURCE_L3DATA, - RDT_RESOURCE_L3CODE, RDT_RESOURCE_L2, - RDT_RESOURCE_L2DATA, - RDT_RESOURCE_L2CODE, RDT_RESOURCE_MBA, /* Must be the last */ RDT_NUM_RESOURCES, }; +static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res); + + hw_res++; + return &hw_res->r_resctrl; +} + +static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) +{ + return rdt_resources_all[l].cdp_enabled; +} + +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); + +/* + * To return the common struct rdt_resource, which is contained in struct + * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. + */ #define for_each_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) + for (r = &rdt_resources_all[0].r_resctrl; \ + r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \ + r = resctrl_inc(r)) #define for_each_capable_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->alloc_capable || r->mon_capable) #define for_each_alloc_capable_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->alloc_capable) #define for_each_mon_capable_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->mon_capable) #define for_each_alloc_enabled_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->alloc_enabled) #define for_each_mon_enabled_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->mon_enabled) /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ @@ -594,7 +508,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); -bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, unsigned long cbm, int closid, bool exclusive); unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, unsigned long cbm); @@ -609,7 +523,6 @@ void rdt_pseudo_lock_release(void); int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); -int update_domains(struct rdt_resource *r, int closid); int closids_supported(void); void closid_free(int closid); int alloc_rmid(void); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 8caf871b796f..c9f0f3d63f75 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -174,7 +174,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free) struct rdt_resource *r; u32 crmid = 1, nrmid; - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; /* * Skip RMID 0 and start from RMID 1 and check all the RMIDs that @@ -232,7 +232,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) int cpu; u64 val; - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; entry->busy = 0; cpu = get_cpu(); @@ -287,6 +287,7 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); struct mbm_state *m; u64 chunks, tval; @@ -318,7 +319,7 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) return 0; } - chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width); + chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width); m->chunks += chunks; m->prev_msr = tval; @@ -333,7 +334,7 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) */ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); struct mbm_state *m = &rr->d->mbm_local[rmid]; u64 tval, cur_bw, chunks; @@ -341,8 +342,8 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) return; - chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); - cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20; + chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width); + cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20; if (m->delta_comp) m->delta_bw = abs(cur_bw - m->prev_bw); @@ -421,6 +422,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) { u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; + struct rdt_hw_resource *hw_r_mba; + struct rdt_hw_domain *hw_dom_mba; u32 cur_bw, delta_bw, user_bw; struct rdt_resource *r_mba; struct rdt_domain *dom_mba; @@ -430,7 +433,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) if (!is_mbm_local_enabled()) return; - r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; + hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; + r_mba = &hw_r_mba->r_resctrl; closid = rgrp->closid; rmid = rgrp->mon.rmid; pmbm_data = &dom_mbm->mbm_local[rmid]; @@ -440,11 +444,16 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) pr_warn_once("Failure to get domain for MBA update\n"); return; } + hw_dom_mba = resctrl_to_arch_dom(dom_mba); cur_bw = pmbm_data->prev_bw; - user_bw = dom_mba->mbps_val[closid]; + user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); delta_bw = pmbm_data->delta_bw; - cur_msr_val = dom_mba->ctrl_val[closid]; + /* + * resctrl_arch_get_config() chooses the mbps/ctrl value to return + * based on is_mba_sc(). For now, reach into the hw_dom. + */ + cur_msr_val = hw_dom_mba->ctrl_val[closid]; /* * For Ctrl groups read data from child monitor groups. @@ -479,9 +488,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) return; } - cur_msr = r_mba->msr_base + closid; + cur_msr = hw_r_mba->msr_base + closid; wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba)); - dom_mba->ctrl_val[closid] = new_msr_val; + hw_dom_mba->ctrl_val[closid] = new_msr_val; /* * Delta values are updated dynamically package wise for each @@ -543,7 +552,7 @@ void cqm_handle_limbo(struct work_struct *work) mutex_lock(&rdtgroup_mutex); - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; d = container_of(work, struct rdt_domain, cqm_limbo.work); __check_limbo(d, false); @@ -579,7 +588,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; d = container_of(work, struct rdt_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -676,15 +685,16 @@ static void l3_mon_evt_init(struct rdt_resource *r) int rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int cl_size = boot_cpu_data.x86_cache_size; int ret; - r->mon_scale = boot_cpu_data.x86_cache_occ_scale; + hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; - r->mbm_width = MBM_CNTR_WIDTH_BASE; + hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) - r->mbm_width += mbm_offset; + hw_res->mbm_width += mbm_offset; else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX) pr_warn("Ignoring impossible MBM counter offset\n"); @@ -698,7 +708,7 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid; /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ - resctrl_cqm_threshold /= r->mon_scale; + resctrl_cqm_threshold /= hw_res->mon_scale; ret = dom_data_init(r); if (ret) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 2207916cae65..db813f819ad6 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -250,7 +250,7 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) plr->line_size = 0; kfree(plr->kmem); plr->kmem = NULL; - plr->r = NULL; + plr->s = NULL; if (plr->d) plr->d->plr = NULL; plr->d = NULL; @@ -294,10 +294,10 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) ci = get_cpu_cacheinfo(plr->cpu); - plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm); + plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == plr->r->cache_level) { + if (ci->info_list[i].level == plr->s->res->cache_level) { plr->line_size = ci->info_list[i].coherency_line_size; return 0; } @@ -688,8 +688,8 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * resource, the portion of cache used by it should be made * unavailable to all future allocations from both resources. */ - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled || - rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) { + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) || + resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) { rdt_last_cmd_puts("CDP enabled\n"); return -EINVAL; } @@ -800,7 +800,7 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm unsigned long cbm_b; if (d->plr) { - cbm_len = d->plr->r->cache.cbm_len; + cbm_len = d->plr->s->res->cache.cbm_len; cbm_b = d->plr->cbm; if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) return true; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 01fd30e7829d..b57b3db9a6a7 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -39,6 +39,9 @@ static struct kernfs_root *rdt_root; struct rdtgroup rdtgroup_default; LIST_HEAD(rdt_all_groups); +/* list of entries for the schemata file */ +LIST_HEAD(resctrl_schema_all); + /* Kernel fs node for "info" directory under root */ static struct kernfs_node *kn_info; @@ -100,12 +103,12 @@ int closids_supported(void) static void closid_init(void) { - struct rdt_resource *r; - int rdt_min_closid = 32; + struct resctrl_schema *s; + u32 rdt_min_closid = 32; /* Compute rdt_min_closid across all resources */ - for_each_alloc_enabled_rdt_resource(r) - rdt_min_closid = min(rdt_min_closid, r->num_closid); + list_for_each_entry(s, &resctrl_schema_all, list) + rdt_min_closid = min(rdt_min_closid, s->num_closid); closid_free_map = BIT_MASK(rdt_min_closid) - 1; @@ -842,16 +845,17 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; - seq_printf(seq, "%d\n", r->num_closid); + seq_printf(seq, "%u\n", s->num_closid); return 0; } static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->default_ctrl); return 0; @@ -860,7 +864,8 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.min_cbm_bits); return 0; @@ -869,7 +874,8 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, static int rdt_shareable_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->cache.shareable_bits); return 0; @@ -892,38 +898,40 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; /* * Use unsigned long even though only 32 bits are used to ensure * test_bit() is used safely. */ unsigned long sw_shareable = 0, hw_shareable = 0; unsigned long exclusive = 0, pseudo_locked = 0; + struct rdt_resource *r = s->res; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; bool sep = false; - u32 *ctrl; + u32 ctrl_val; mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; list_for_each_entry(dom, &r->domains, list) { if (sep) seq_putc(seq, ';'); - ctrl = dom->ctrl_val; sw_shareable = 0; exclusive = 0; seq_printf(seq, "%d=", dom->id); - for (i = 0; i < closids_supported(); i++, ctrl++) { + for (i = 0; i < closids_supported(); i++) { if (!closid_allocated(i)) continue; + ctrl_val = resctrl_arch_get_config(r, dom, i, + s->conf_type); mode = rdtgroup_mode_by_closid(i); switch (mode) { case RDT_MODE_SHAREABLE: - sw_shareable |= *ctrl; + sw_shareable |= ctrl_val; break; case RDT_MODE_EXCLUSIVE: - exclusive |= *ctrl; + exclusive |= ctrl_val; break; case RDT_MODE_PSEUDO_LOCKSETUP: /* @@ -970,7 +978,8 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.min_bw); return 0; @@ -1001,7 +1010,8 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.bw_gran); return 0; @@ -1010,7 +1020,8 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, static int rdt_delay_linear_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.delay_linear); return 0; @@ -1020,8 +1031,9 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale); + seq_printf(seq, "%u\n", resctrl_cqm_threshold * hw_res->mon_scale); return 0; } @@ -1029,7 +1041,8 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) seq_puts(seq, "per-thread\n"); @@ -1042,7 +1055,7 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_hw_resource *hw_res; unsigned int bytes; int ret; @@ -1053,7 +1066,8 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, if (bytes > (boot_cpu_data.x86_cache_size * 1024)) return -EINVAL; - resctrl_cqm_threshold = bytes / r->mon_scale; + hw_res = resctrl_to_arch_res(of->kn->parent->priv); + resctrl_cqm_threshold = bytes / hw_res->mon_scale; return nbytes; } @@ -1078,76 +1092,17 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, return 0; } -/** - * rdt_cdp_peer_get - Retrieve CDP peer if it exists - * @r: RDT resource to which RDT domain @d belongs - * @d: Cache instance for which a CDP peer is requested - * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer) - * Used to return the result. - * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer) - * Used to return the result. - * - * RDT resources are managed independently and by extension the RDT domains - * (RDT resource instances) are managed independently also. The Code and - * Data Prioritization (CDP) RDT resources, while managed independently, - * could refer to the same underlying hardware. For example, - * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache. - * - * When provided with an RDT resource @r and an instance of that RDT - * resource @d rdt_cdp_peer_get() will return if there is a peer RDT - * resource and the exact instance that shares the same hardware. - * - * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists. - * If a CDP peer was found, @r_cdp will point to the peer RDT resource - * and @d_cdp will point to the peer RDT domain. - */ -static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, - struct rdt_resource **r_cdp, - struct rdt_domain **d_cdp) +static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) { - struct rdt_resource *_r_cdp = NULL; - struct rdt_domain *_d_cdp = NULL; - int ret = 0; - - switch (r->rid) { - case RDT_RESOURCE_L3DATA: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE]; - break; - case RDT_RESOURCE_L3CODE: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA]; - break; - case RDT_RESOURCE_L2DATA: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE]; - break; - case RDT_RESOURCE_L2CODE: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA]; - break; + switch (my_type) { + case CDP_CODE: + return CDP_DATA; + case CDP_DATA: + return CDP_CODE; default: - ret = -ENOENT; - goto out; - } - - /* - * When a new CPU comes online and CDP is enabled then the new - * RDT domains (if any) associated with both CDP RDT resources - * are added in the same CPU online routine while the - * rdtgroup_mutex is held. It should thus not happen for one - * RDT domain to exist and be associated with its RDT CDP - * resource but there is no RDT domain associated with the - * peer RDT CDP resource. Hence the WARN. - */ - _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); - if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { - _r_cdp = NULL; - _d_cdp = NULL; - ret = -EINVAL; + case CDP_NONE: + return CDP_NONE; } - -out: - *r_cdp = _r_cdp; - *d_cdp = _d_cdp; - - return ret; } /** @@ -1171,11 +1126,11 @@ out: * Return: false if CBM does not overlap, true if it does. */ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - unsigned long cbm, int closid, bool exclusive) + unsigned long cbm, int closid, + enum resctrl_conf_type type, bool exclusive) { enum rdtgrp_mode mode; unsigned long ctrl_b; - u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ @@ -1186,9 +1141,8 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d } /* Check for overlap with other resource groups */ - ctrl = d->ctrl_val; - for (i = 0; i < closids_supported(); i++, ctrl++) { - ctrl_b = *ctrl; + for (i = 0; i < closids_supported(); i++) { + ctrl_b = resctrl_arch_get_config(r, d, i, type); mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { @@ -1208,7 +1162,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d /** * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware - * @r: Resource to which domain instance @d belongs. + * @s: Schema for the resource to which domain instance @d belongs. * @d: The domain instance for which @closid is being tested. * @cbm: Capacity bitmask being tested. * @closid: Intended closid for @cbm. @@ -1226,19 +1180,19 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d * * Return: true if CBM overlap detected, false if there is no overlap */ -bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, unsigned long cbm, int closid, bool exclusive) { - struct rdt_resource *r_cdp; - struct rdt_domain *d_cdp; + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + struct rdt_resource *r = s->res; - if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive)) + if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, + exclusive)) return true; - if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0) + if (!resctrl_arch_get_cdp_enabled(r->rid)) return false; - - return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive); + return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); } /** @@ -1256,17 +1210,21 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; + struct resctrl_schema *s; struct rdt_resource *r; bool has_cache = false; struct rdt_domain *d; + u32 ctrl; - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; if (r->rid == RDT_RESOURCE_MBA) continue; has_cache = true; list_for_each_entry(d, &r->domains, list) { - if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], - rdtgrp->closid, false)) { + ctrl = resctrl_arch_get_config(r, d, closid, + s->conf_type); + if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { rdt_last_cmd_puts("Schemata overlaps\n"); return false; } @@ -1397,6 +1355,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, static int rdtgroup_size_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { + struct resctrl_schema *schema; struct rdtgroup *rdtgrp; struct rdt_resource *r; struct rdt_domain *d; @@ -1418,8 +1377,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, ret = -ENODEV; } else { seq_printf(s, "%*s:", max_name_width, - rdtgrp->plr->r->name); - size = rdtgroup_cbm_to_size(rdtgrp->plr->r, + rdtgrp->plr->s->name); + size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, rdtgrp->plr->d, rdtgrp->plr->cbm); seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); @@ -1427,18 +1386,19 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, goto out; } - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(schema, &resctrl_schema_all, list) { + r = schema->res; sep = false; - seq_printf(s, "%*s:", max_name_width, r->name); + seq_printf(s, "%*s:", max_name_width, schema->name); list_for_each_entry(d, &r->domains, list) { if (sep) seq_putc(s, ';'); if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { size = 0; } else { - ctrl = (!is_mba_sc(r) ? - d->ctrl_val[rdtgrp->closid] : - d->mbps_val[rdtgrp->closid]); + ctrl = resctrl_arch_get_config(r, d, + rdtgrp->closid, + schema->conf_type); if (r->rid == RDT_RESOURCE_MBA) size = ctrl; else @@ -1757,14 +1717,14 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, return ret; } -static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, +static int rdtgroup_mkdir_info_resdir(void *priv, char *name, unsigned long fflags) { struct kernfs_node *kn_subdir; int ret; kn_subdir = kernfs_create_dir(kn_info, name, - kn_info->mode, r); + kn_info->mode, priv); if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); @@ -1781,6 +1741,7 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) { + struct resctrl_schema *s; struct rdt_resource *r; unsigned long fflags; char name[32]; @@ -1795,9 +1756,11 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) if (ret) goto out_destroy; - for_each_alloc_enabled_rdt_resource(r) { + /* loop over enabled controls, these are all alloc_enabled */ + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; fflags = r->fflags | RF_CTRL_INFO; - ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); + ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; } @@ -1867,7 +1830,7 @@ static void l2_qos_cfg_update(void *arg) static inline bool is_mba_linear(void) { - return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear; + return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; } static int set_cache_qos_cfg(int level, bool enable) @@ -1888,7 +1851,7 @@ static int set_cache_qos_cfg(int level, bool enable) if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - r_l = &rdt_resources_all[level]; + r_l = &rdt_resources_all[level].r_resctrl; list_for_each_entry(d, &r_l->domains, list) { if (r_l->cache.arch_has_per_cpu_cfg) /* Pick all the CPUs in the domain instance */ @@ -1914,14 +1877,16 @@ static int set_cache_qos_cfg(int level, bool enable) /* Restore the qos cfg state when a domain comes online */ void rdt_domain_reconfigure_cdp(struct rdt_resource *r) { - if (!r->alloc_capable) + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (!r->cdp_capable) return; - if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) - l2_qos_cfg_update(&r->alloc_enabled); + if (r->rid == RDT_RESOURCE_L2) + l2_qos_cfg_update(&hw_res->cdp_enabled); - if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) - l3_qos_cfg_update(&r->alloc_enabled); + if (r->rid == RDT_RESOURCE_L3) + l3_qos_cfg_update(&hw_res->cdp_enabled); } /* @@ -1932,7 +1897,8 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r) */ static int set_mba_sc(bool mba_sc) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_hw_domain *hw_dom; struct rdt_domain *d; if (!is_mbm_enabled() || !is_mba_linear() || @@ -1940,73 +1906,60 @@ static int set_mba_sc(bool mba_sc) return -EINVAL; r->membw.mba_sc = mba_sc; - list_for_each_entry(d, &r->domains, list) - setup_default_ctrlval(r, d->ctrl_val, d->mbps_val); + list_for_each_entry(d, &r->domains, list) { + hw_dom = resctrl_to_arch_dom(d); + setup_default_ctrlval(r, hw_dom->ctrl_val, hw_dom->mbps_val); + } return 0; } -static int cdp_enable(int level, int data_type, int code_type) +static int cdp_enable(int level) { - struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; - struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; - struct rdt_resource *r_l = &rdt_resources_all[level]; + struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; int ret; - if (!r_l->alloc_capable || !r_ldata->alloc_capable || - !r_lcode->alloc_capable) + if (!r_l->alloc_capable) return -EINVAL; ret = set_cache_qos_cfg(level, true); - if (!ret) { - r_l->alloc_enabled = false; - r_ldata->alloc_enabled = true; - r_lcode->alloc_enabled = true; - } + if (!ret) + rdt_resources_all[level].cdp_enabled = true; + return ret; } -static int cdpl3_enable(void) +static void cdp_disable(int level) { - return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, - RDT_RESOURCE_L3CODE); -} + struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; -static int cdpl2_enable(void) -{ - return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, - RDT_RESOURCE_L2CODE); + if (r_hw->cdp_enabled) { + set_cache_qos_cfg(level, false); + r_hw->cdp_enabled = false; + } } -static void cdp_disable(int level, int data_type, int code_type) +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) { - struct rdt_resource *r = &rdt_resources_all[level]; + struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; - r->alloc_enabled = r->alloc_capable; + if (!hw_res->r_resctrl.cdp_capable) + return -EINVAL; - if (rdt_resources_all[data_type].alloc_enabled) { - rdt_resources_all[data_type].alloc_enabled = false; - rdt_resources_all[code_type].alloc_enabled = false; - set_cache_qos_cfg(level, false); - } -} + if (enable) + return cdp_enable(l); -static void cdpl3_disable(void) -{ - cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); -} + cdp_disable(l); -static void cdpl2_disable(void) -{ - cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); + return 0; } static void cdp_disable_all(void) { - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) - cdpl3_disable(); - if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) - cdpl2_disable(); + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); } /* @@ -2084,10 +2037,10 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) int ret = 0; if (ctx->enable_cdpl2) - ret = cdpl2_enable(); + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); if (!ret && ctx->enable_cdpl3) - ret = cdpl3_enable(); + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); if (!ret && ctx->enable_mba_mbps) ret = set_mba_sc(true); @@ -2095,6 +2048,92 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) return ret; } +static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) +{ + struct resctrl_schema *s; + const char *suffix = ""; + int ret, cl; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->res = r; + s->num_closid = resctrl_arch_get_num_closid(r); + if (resctrl_arch_get_cdp_enabled(r->rid)) + s->num_closid /= 2; + + s->conf_type = type; + switch (type) { + case CDP_CODE: + suffix = "CODE"; + break; + case CDP_DATA: + suffix = "DATA"; + break; + case CDP_NONE: + suffix = ""; + break; + } + + ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); + if (ret >= sizeof(s->name)) { + kfree(s); + return -EINVAL; + } + + cl = strlen(s->name); + + /* + * If CDP is supported by this resource, but not enabled, + * include the suffix. This ensures the tabular format of the + * schemata file does not change between mounts of the filesystem. + */ + if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) + cl += 4; + + if (cl > max_name_width) + max_name_width = cl; + + INIT_LIST_HEAD(&s->list); + list_add(&s->list, &resctrl_schema_all); + + return 0; +} + +static int schemata_list_create(void) +{ + struct rdt_resource *r; + int ret = 0; + + for_each_alloc_enabled_rdt_resource(r) { + if (resctrl_arch_get_cdp_enabled(r->rid)) { + ret = schemata_list_add(r, CDP_CODE); + if (ret) + break; + + ret = schemata_list_add(r, CDP_DATA); + } else { + ret = schemata_list_add(r, CDP_NONE); + } + + if (ret) + break; + } + + return ret; +} + +static void schemata_list_destroy(void) +{ + struct resctrl_schema *s, *tmp; + + list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { + list_del(&s->list); + kfree(s); + } +} + static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); @@ -2116,11 +2155,17 @@ static int rdt_get_tree(struct fs_context *fc) if (ret < 0) goto out_cdp; + ret = schemata_list_create(); + if (ret) { + schemata_list_destroy(); + goto out_mba; + } + closid_init(); ret = rdtgroup_create_info_dir(rdtgroup_default.kn); if (ret < 0) - goto out_mba; + goto out_schemata_free; if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, @@ -2153,7 +2198,7 @@ static int rdt_get_tree(struct fs_context *fc) static_branch_enable_cpuslocked(&rdt_enable_key); if (is_mbm_enabled()) { - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; list_for_each_entry(dom, &r->domains, list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL); } @@ -2170,6 +2215,8 @@ out_mongrp: kernfs_remove(kn_mongrp); out_info: kernfs_remove(kn_info); +out_schemata_free: + schemata_list_destroy(); out_mba: if (ctx->enable_mba_mbps) set_mba_sc(false); @@ -2257,6 +2304,8 @@ static int rdt_init_fs_context(struct fs_context *fc) static int reset_all_ctrls(struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom; struct msr_param msr_param; cpumask_var_t cpu_mask; struct rdt_domain *d; @@ -2267,7 +2316,7 @@ static int reset_all_ctrls(struct rdt_resource *r) msr_param.res = r; msr_param.low = 0; - msr_param.high = r->num_closid; + msr_param.high = hw_res->num_closid; /* * Disable resource control for this resource by setting all @@ -2275,10 +2324,11 @@ static int reset_all_ctrls(struct rdt_resource *r) * from each domain to update the MSRs below. */ list_for_each_entry(d, &r->domains, list) { + hw_dom = resctrl_to_arch_dom(d); cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - for (i = 0; i < r->num_closid; i++) - d->ctrl_val[i] = r->default_ctrl; + for (i = 0; i < hw_res->num_closid; i++) + hw_dom->ctrl_val[i] = r->default_ctrl; } cpu = get_cpu(); /* Update CBM on this cpu if it's in cpu_mask. */ @@ -2408,6 +2458,7 @@ static void rdt_kill_sb(struct super_block *sb) rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; + schemata_list_destroy(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); static_branch_disable_cpuslocked(&rdt_enable_key); @@ -2642,23 +2693,24 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) * Set the RDT domain up to start off with all usable allocations. That is, * all shareable and unused bits. All-zero CBM is invalid. */ -static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, +static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, u32 closid) { - struct rdt_resource *r_cdp = NULL; - struct rdt_domain *d_cdp = NULL; + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + enum resctrl_conf_type t = s->conf_type; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; u32 used_b = 0, unused_b = 0; unsigned long tmp_cbm; enum rdtgrp_mode mode; - u32 peer_ctl, *ctrl; + u32 peer_ctl, ctrl_val; int i; - rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp); - d->have_new_ctrl = false; - d->new_ctrl = r->cache.shareable_bits; + cfg = &d->staged_config[t]; + cfg->have_new_ctrl = false; + cfg->new_ctrl = r->cache.shareable_bits; used_b = r->cache.shareable_bits; - ctrl = d->ctrl_val; - for (i = 0; i < closids_supported(); i++, ctrl++) { + for (i = 0; i < closids_supported(); i++) { if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) @@ -2673,35 +2725,38 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * usage to ensure there is no overlap * with an exclusive group. */ - if (d_cdp) - peer_ctl = d_cdp->ctrl_val[i]; + if (resctrl_arch_get_cdp_enabled(r->rid)) + peer_ctl = resctrl_arch_get_config(r, d, i, + peer_type); else peer_ctl = 0; - used_b |= *ctrl | peer_ctl; + ctrl_val = resctrl_arch_get_config(r, d, i, + s->conf_type); + used_b |= ctrl_val | peer_ctl; if (mode == RDT_MODE_SHAREABLE) - d->new_ctrl |= *ctrl | peer_ctl; + cfg->new_ctrl |= ctrl_val | peer_ctl; } } if (d->plr && d->plr->cbm > 0) used_b |= d->plr->cbm; unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); unused_b &= BIT_MASK(r->cache.cbm_len) - 1; - d->new_ctrl |= unused_b; + cfg->new_ctrl |= unused_b; /* * Force the initial CBM to be valid, user can * modify the CBM based on system availability. */ - d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r); + cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); /* * Assign the u32 CBM to an unsigned long to ensure that * bitmap_weight() does not access out-of-bound memory. */ - tmp_cbm = d->new_ctrl; + tmp_cbm = cfg->new_ctrl; if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id); + rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); return -ENOSPC; } - d->have_new_ctrl = true; + cfg->have_new_ctrl = true; return 0; } @@ -2716,13 +2771,13 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * If there are no more shareable bits available on any domain then * the entire allocation will fail. */ -static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid) +static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { struct rdt_domain *d; int ret; - list_for_each_entry(d, &r->domains, list) { - ret = __init_one_rdt_domain(d, r, closid); + list_for_each_entry(d, &s->res->domains, list) { + ret = __init_one_rdt_domain(d, s, closid); if (ret < 0) return ret; } @@ -2733,30 +2788,34 @@ static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid) /* Initialize MBA resource with default values. */ static void rdtgroup_init_mba(struct rdt_resource *r) { + struct resctrl_staged_config *cfg; struct rdt_domain *d; list_for_each_entry(d, &r->domains, list) { - d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl; - d->have_new_ctrl = true; + cfg = &d->staged_config[CDP_NONE]; + cfg->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl; + cfg->have_new_ctrl = true; } } /* Initialize the RDT group's allocations. */ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) { + struct resctrl_schema *s; struct rdt_resource *r; int ret; - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; if (r->rid == RDT_RESOURCE_MBA) { rdtgroup_init_mba(r); } else { - ret = rdtgroup_init_cat(r, rdtgrp->closid); + ret = rdtgroup_init_cat(s, rdtgrp->closid); if (ret < 0) return ret; } - ret = update_domains(r, rdtgrp->closid); + ret = resctrl_arch_update_domains(r, rdtgrp->closid); if (ret < 0) { rdt_last_cmd_puts("Failed to initialize allocations\n"); return ret; @@ -3124,13 +3183,13 @@ out: static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) { - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) seq_puts(seq, ",cdp"); - if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) seq_puts(seq, ",cdpl2"); - if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA])) + if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) seq_puts(seq, ",mba_MBps"); return 0; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 67f590425d90..d8c64dab0efe 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -432,7 +432,7 @@ SYM_FUNC_START(early_ignore_irq) pushl 32(%esp) pushl 40(%esp) pushl $int_msg - call printk + call _printk call dump_stack diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 282b4ee1339f..15aefa3f3e18 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -235,15 +235,15 @@ static char irq_trigger[2]; */ static void restore_ELCR(char *trigger) { - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); + outb(trigger[0], PIC_ELCR1); + outb(trigger[1], PIC_ELCR2); } static void save_ELCR(char *trigger) { /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; + trigger[0] = inb(PIC_ELCR1) & 0xF8; + trigger[1] = inb(PIC_ELCR2) & 0xDE; } static void i8259A_resume(void) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a26643dc6bd6..b656456c3a94 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -884,10 +884,11 @@ static void kvm_wait(u8 *ptr, u8 val) } else { local_irq_disable(); + /* safe_halt() will enable IRQ */ if (READ_ONCE(*ptr) == val) safe_halt(); - - local_irq_enable(); + else + local_irq_enable(); } } diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index aa15132228da..525876e7b9f4 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -154,7 +154,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) if (num_entries > LDT_ENTRIES) return NULL; - new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT); if (!new_ldt) return NULL; @@ -168,9 +168,9 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) * than PAGE_SIZE. */ if (alloc_size > PAGE_SIZE) - new_ldt->entries = vzalloc(alloc_size); + new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); else - new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); + new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!new_ldt->entries) { kfree(new_ldt); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 8f06449aab27..fed721f90116 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -19,6 +19,7 @@ #include <linux/smp.h> #include <linux/pci.h> +#include <asm/i8259.h> #include <asm/io_apic.h> #include <asm/acpi.h> #include <asm/irqdomain.h> @@ -251,7 +252,7 @@ static int __init ELCR_trigger(unsigned int irq) { unsigned int port; - port = 0x4d0 + (irq >> 3); + port = PIC_ELCR1 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index ebfb91108232..0a40df66a40d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -388,10 +388,11 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { }, { /* Handle problems with rebooting on the OptiPlex 990. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell OptiPlex 990 BIOS A0x", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), + DMI_MATCH(DMI_BIOS_VERSION, "A0"), }, }, { /* Handle problems with rebooting on Dell 300's */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bff3a784aec5..79f164141116 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -5,6 +5,7 @@ * This file contains the setup_arch() code, which handles the architecture-dependent * parts of early kernel initialization. */ +#include <linux/acpi.h> #include <linux/console.h> #include <linux/crash_dump.h> #include <linux/dma-map-ops.h> @@ -572,16 +573,6 @@ void __init reserve_standard_io_resources(void) } -static __init void reserve_ibft_region(void) -{ - unsigned long addr, size = 0; - - addr = find_ibft_region(&size); - - if (size) - memblock_reserve(addr, size); -} - static bool __init snb_gfx_workaround_needed(void) { #ifdef CONFIG_PCI diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 78a32b956e81..5afd98559193 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -135,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_free_ptr(ptr, size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 06743ec054d2..b52407c56000 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -34,7 +34,13 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(NSIGSYS != 2); /* This is part of the ABI and can never change in size: */ + BUILD_BUG_ON(sizeof(siginfo_t) != 128); BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128); + + /* This is a part of the ABI and can never change in alignment */ + BUILD_BUG_ON(__alignof__(siginfo_t) != 8); + BUILD_BUG_ON(__alignof__(compat_siginfo_t) != 4); + /* * The offsets of all the (unioned) si_fields are fixed * in the ABI, of course. Make sure none of them ever diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9320285a5e29..85f6e242b6b4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -610,6 +610,9 @@ void set_cpu_sibling_map(int cpu) if (threads > __max_smt_threads) __max_smt_threads = threads; + for_each_cpu(i, topology_sibling_cpumask(cpu)) + cpu_data(i).smt_active = threads > 1; + /* * This needs a separate iteration over the cpus because we rely on all * topology_sibling_cpumask links to be set-up. @@ -1552,8 +1555,13 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, topology_die_cpumask(cpu)) cpumask_clear_cpu(cpu, topology_die_cpumask(sibling)); - for_each_cpu(sibling, topology_sibling_cpumask(cpu)) + + for_each_cpu(sibling, topology_sibling_cpumask(cpu)) { cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); + if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1) + cpu_data(sibling).smt_active = false; + } + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); cpumask_clear(cpu_llc_shared_mask(cpu)); diff --git a/arch/x86/kernel/sysfb.c b/arch/x86/kernel/sysfb.c deleted file mode 100644 index 014ebd8ca869..000000000000 --- a/arch/x86/kernel/sysfb.c +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Generic System Framebuffers on x86 - * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> - */ - -/* - * Simple-Framebuffer support for x86 systems - * Create a platform-device for any available boot framebuffer. The - * simple-framebuffer platform device is already available on DT systems, so - * this module parses the global "screen_info" object and creates a suitable - * platform device compatible with the "simple-framebuffer" DT object. If - * the framebuffer is incompatible, we instead create a legacy - * "vesa-framebuffer", "efi-framebuffer" or "platform-framebuffer" device and - * pass the screen_info as platform_data. This allows legacy drivers - * to pick these devices up without messing with simple-framebuffer drivers. - * The global "screen_info" is still valid at all times. - * - * If CONFIG_X86_SYSFB is not selected, we never register "simple-framebuffer" - * platform devices, but only use legacy framebuffer devices for - * backwards compatibility. - * - * TODO: We set the dev_id field of all platform-devices to 0. This allows - * other x86 OF/DT parsers to create such devices, too. However, they must - * start at offset 1 for this to work. - */ - -#include <linux/err.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/platform_data/simplefb.h> -#include <linux/platform_device.h> -#include <linux/screen_info.h> -#include <asm/sysfb.h> - -static __init int sysfb_init(void) -{ - struct screen_info *si = &screen_info; - struct simplefb_platform_data mode; - struct platform_device *pd; - const char *name; - bool compatible; - int ret; - - sysfb_apply_efi_quirks(); - - /* try to create a simple-framebuffer device */ - compatible = parse_mode(si, &mode); - if (compatible) { - ret = create_simplefb(si, &mode); - if (!ret) - return 0; - } - - /* if the FB is incompatible, create a legacy framebuffer device */ - if (si->orig_video_isVGA == VIDEO_TYPE_EFI) - name = "efi-framebuffer"; - else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) - name = "vesa-framebuffer"; - else - name = "platform-framebuffer"; - - pd = platform_device_register_resndata(NULL, name, 0, - NULL, 0, si, sizeof(*si)); - return PTR_ERR_OR_ZERO(pd); -} - -/* must execute after PCI subsystem for EFI quirks */ -device_initcall(sysfb_init); diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c deleted file mode 100644 index 8a56a6d80098..000000000000 --- a/arch/x86/kernel/sysfb_efi.c +++ /dev/null @@ -1,284 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Generic System Framebuffers on x86 - * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> - * - * EFI Quirks Copyright (c) 2006 Edgar Hucek <gimli@dark-green.com> - */ - -/* - * EFI Quirks - * Several EFI systems do not correctly advertise their boot framebuffers. - * Hence, we use this static table of known broken machines and fix up the - * information so framebuffer drivers can load correctly. - */ - -#include <linux/dmi.h> -#include <linux/err.h> -#include <linux/efi.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/pci.h> -#include <linux/screen_info.h> -#include <video/vga.h> - -#include <asm/efi.h> -#include <asm/sysfb.h> - -enum { - OVERRIDE_NONE = 0x0, - OVERRIDE_BASE = 0x1, - OVERRIDE_STRIDE = 0x2, - OVERRIDE_HEIGHT = 0x4, - OVERRIDE_WIDTH = 0x8, -}; - -struct efifb_dmi_info efifb_dmi_list[] = { - [M_I17] = { "i17", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, - [M_I20] = { "i20", 0x80010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE }, /* guess */ - [M_I20_SR] = { "imac7", 0x40010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE }, - [M_I24] = { "i24", 0x80010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, /* guess */ - [M_I24_8_1] = { "imac8", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, - [M_I24_10_1] = { "imac10", 0xc0010000, 2048 * 4, 1920, 1080, OVERRIDE_NONE }, - [M_I27_11_1] = { "imac11", 0xc0010000, 2560 * 4, 2560, 1440, OVERRIDE_NONE }, - [M_MINI]= { "mini", 0x80000000, 2048 * 4, 1024, 768, OVERRIDE_NONE }, - [M_MINI_3_1] = { "mini31", 0x40010000, 1024 * 4, 1024, 768, OVERRIDE_NONE }, - [M_MINI_4_1] = { "mini41", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, - [M_MB] = { "macbook", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, - [M_MB_5_1] = { "macbook51", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, - [M_MB_6_1] = { "macbook61", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, - [M_MB_7_1] = { "macbook71", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, - [M_MBA] = { "mba", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, - /* 11" Macbook Air 3,1 passes the wrong stride */ - [M_MBA_3] = { "mba3", 0, 2048 * 4, 0, 0, OVERRIDE_STRIDE }, - [M_MBP] = { "mbp", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, - [M_MBP_2] = { "mbp2", 0, 0, 0, 0, OVERRIDE_NONE }, /* placeholder */ - [M_MBP_2_2] = { "mbp22", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, - [M_MBP_SR] = { "mbp3", 0x80030000, 2048 * 4, 1440, 900, OVERRIDE_NONE }, - [M_MBP_4] = { "mbp4", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, - [M_MBP_5_1] = { "mbp51", 0xc0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE }, - [M_MBP_5_2] = { "mbp52", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, - [M_MBP_5_3] = { "mbp53", 0xd0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE }, - [M_MBP_6_1] = { "mbp61", 0x90030000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, - [M_MBP_6_2] = { "mbp62", 0x90030000, 2048 * 4, 1680, 1050, OVERRIDE_NONE }, - [M_MBP_7_1] = { "mbp71", 0xc0010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, - [M_MBP_8_2] = { "mbp82", 0x90010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, - [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE } -}; - -void efifb_setup_from_dmi(struct screen_info *si, const char *opt) -{ - int i; - - for (i = 0; i < M_UNKNOWN; i++) { - if (efifb_dmi_list[i].base != 0 && - !strcmp(opt, efifb_dmi_list[i].optname)) { - si->lfb_base = efifb_dmi_list[i].base; - si->lfb_linelength = efifb_dmi_list[i].stride; - si->lfb_width = efifb_dmi_list[i].width; - si->lfb_height = efifb_dmi_list[i].height; - } - } -} - -#define choose_value(dmivalue, fwvalue, field, flags) ({ \ - typeof(fwvalue) _ret_ = fwvalue; \ - if ((flags) & (field)) \ - _ret_ = dmivalue; \ - else if ((fwvalue) == 0) \ - _ret_ = dmivalue; \ - _ret_; \ - }) - -static int __init efifb_set_system(const struct dmi_system_id *id) -{ - struct efifb_dmi_info *info = id->driver_data; - - if (info->base == 0 && info->height == 0 && info->width == 0 && - info->stride == 0) - return 0; - - /* Trust the bootloader over the DMI tables */ - if (screen_info.lfb_base == 0) { -#if defined(CONFIG_PCI) - struct pci_dev *dev = NULL; - int found_bar = 0; -#endif - if (info->base) { - screen_info.lfb_base = choose_value(info->base, - screen_info.lfb_base, OVERRIDE_BASE, - info->flags); - -#if defined(CONFIG_PCI) - /* make sure that the address in the table is actually - * on a VGA device's PCI BAR */ - - for_each_pci_dev(dev) { - int i; - if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - continue; - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - resource_size_t start, end; - unsigned long flags; - - flags = pci_resource_flags(dev, i); - if (!(flags & IORESOURCE_MEM)) - continue; - - if (flags & IORESOURCE_UNSET) - continue; - - if (pci_resource_len(dev, i) == 0) - continue; - - start = pci_resource_start(dev, i); - end = pci_resource_end(dev, i); - if (screen_info.lfb_base >= start && - screen_info.lfb_base < end) { - found_bar = 1; - break; - } - } - } - if (!found_bar) - screen_info.lfb_base = 0; -#endif - } - } - if (screen_info.lfb_base) { - screen_info.lfb_linelength = choose_value(info->stride, - screen_info.lfb_linelength, OVERRIDE_STRIDE, - info->flags); - screen_info.lfb_width = choose_value(info->width, - screen_info.lfb_width, OVERRIDE_WIDTH, - info->flags); - screen_info.lfb_height = choose_value(info->height, - screen_info.lfb_height, OVERRIDE_HEIGHT, - info->flags); - if (screen_info.orig_video_isVGA == 0) - screen_info.orig_video_isVGA = VIDEO_TYPE_EFI; - } else { - screen_info.lfb_linelength = 0; - screen_info.lfb_width = 0; - screen_info.lfb_height = 0; - screen_info.orig_video_isVGA = 0; - return 0; - } - - printk(KERN_INFO "efifb: dmi detected %s - framebuffer at 0x%08x " - "(%dx%d, stride %d)\n", id->ident, - screen_info.lfb_base, screen_info.lfb_width, - screen_info.lfb_height, screen_info.lfb_linelength); - - return 1; -} - -#define EFIFB_DMI_SYSTEM_ID(vendor, name, enumid) \ - { \ - efifb_set_system, \ - name, \ - { \ - DMI_MATCH(DMI_BIOS_VENDOR, vendor), \ - DMI_MATCH(DMI_PRODUCT_NAME, name) \ - }, \ - &efifb_dmi_list[enumid] \ - } - -static const struct dmi_system_id efifb_dmi_system_table[] __initconst = { - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac4,1", M_I17), - /* At least one of these two will be right; maybe both? */ - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac5,1", M_I20), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac5,1", M_I20), - /* At least one of these two will be right; maybe both? */ - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac6,1", M_I24), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac6,1", M_I24), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac7,1", M_I20_SR), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac8,1", M_I24_8_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac10,1", M_I24_10_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac11,1", M_I27_11_1), - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "Macmini1,1", M_MINI), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini3,1", M_MINI_3_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini4,1", M_MINI_4_1), - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook1,1", M_MB), - /* At least one of these two will be right; maybe both? */ - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook2,1", M_MB), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook2,1", M_MB), - /* At least one of these two will be right; maybe both? */ - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook3,1", M_MB), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook3,1", M_MB), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook4,1", M_MB), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook5,1", M_MB_5_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook6,1", M_MB_6_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook7,1", M_MB_7_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir1,1", M_MBA), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir3,1", M_MBA_3), - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro1,1", M_MBP), - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,1", M_MBP_2), - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,2", M_MBP_2_2), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro2,1", M_MBP_2), - EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro3,1", M_MBP_SR), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro3,1", M_MBP_SR), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro4,1", M_MBP_4), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,1", M_MBP_5_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,2", M_MBP_5_2), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,3", M_MBP_5_3), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,1", M_MBP_6_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,2", M_MBP_6_2), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro7,1", M_MBP_7_1), - EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro8,2", M_MBP_8_2), - {}, -}; - -/* - * Some devices have a portrait LCD but advertise a landscape resolution (and - * pitch). We simply swap width and height for these devices so that we can - * correctly deal with some of them coming with multiple resolutions. - */ -static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = { - { - /* - * Lenovo MIIX310-10ICR, only some batches have the troublesome - * 800x1280 portrait screen. Luckily the portrait version has - * its own BIOS version, so we match on that. - */ - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"), - DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1HCN44WW"), - }, - }, - { - /* Lenovo MIIX 320-10ICR with 800x1280 portrait screen */ - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, - "Lenovo MIIX 320-10ICR"), - }, - }, - { - /* Lenovo D330 with 800x1280 or 1200x1920 portrait screen */ - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, - "Lenovo ideapad D330-10IGM"), - }, - }, - {}, -}; - -__init void sysfb_apply_efi_quirks(void) -{ - if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || - !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS)) - dmi_check_system(efifb_dmi_system_table); - - if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI && - dmi_check_system(efifb_dmi_swap_width_height)) { - u16 temp = screen_info.lfb_width; - - screen_info.lfb_width = screen_info.lfb_height; - screen_info.lfb_height = temp; - screen_info.lfb_linelength = 4 * screen_info.lfb_width; - } -} diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c deleted file mode 100644 index 298fc1edd9c9..000000000000 --- a/arch/x86/kernel/sysfb_simplefb.c +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Generic System Framebuffers on x86 - * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> - */ - -/* - * simple-framebuffer probing - * Try to convert "screen_info" into a "simple-framebuffer" compatible mode. - * If the mode is incompatible, we return "false" and let the caller create - * legacy nodes instead. - */ - -#include <linux/err.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/platform_data/simplefb.h> -#include <linux/platform_device.h> -#include <linux/screen_info.h> -#include <asm/sysfb.h> - -static const char simplefb_resname[] = "BOOTFB"; -static const struct simplefb_format formats[] = SIMPLEFB_FORMATS; - -/* try parsing x86 screen_info into a simple-framebuffer mode struct */ -__init bool parse_mode(const struct screen_info *si, - struct simplefb_platform_data *mode) -{ - const struct simplefb_format *f; - __u8 type; - unsigned int i; - - type = si->orig_video_isVGA; - if (type != VIDEO_TYPE_VLFB && type != VIDEO_TYPE_EFI) - return false; - - for (i = 0; i < ARRAY_SIZE(formats); ++i) { - f = &formats[i]; - if (si->lfb_depth == f->bits_per_pixel && - si->red_size == f->red.length && - si->red_pos == f->red.offset && - si->green_size == f->green.length && - si->green_pos == f->green.offset && - si->blue_size == f->blue.length && - si->blue_pos == f->blue.offset && - si->rsvd_size == f->transp.length && - si->rsvd_pos == f->transp.offset) { - mode->format = f->name; - mode->width = si->lfb_width; - mode->height = si->lfb_height; - mode->stride = si->lfb_linelength; - return true; - } - } - - return false; -} - -__init int create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) -{ - struct platform_device *pd; - struct resource res; - u64 base, size; - u32 length; - - /* - * If the 64BIT_BASE capability is set, ext_lfb_base will contain the - * upper half of the base address. Assemble the address, then make sure - * it is valid and we can actually access it. - */ - base = si->lfb_base; - if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE) - base |= (u64)si->ext_lfb_base << 32; - if (!base || (u64)(resource_size_t)base != base) { - printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); - return -EINVAL; - } - - /* - * Don't use lfb_size as IORESOURCE size, since it may contain the - * entire VMEM, and thus require huge mappings. Use just the part we - * need, that is, the part where the framebuffer is located. But verify - * that it does not exceed the advertised VMEM. - * Note that in case of VBE, the lfb_size is shifted by 16 bits for - * historical reasons. - */ - size = si->lfb_size; - if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) - size <<= 16; - length = mode->height * mode->stride; - if (length > size) { - printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); - return -EINVAL; - } - length = PAGE_ALIGN(length); - - /* setup IORESOURCE_MEM as framebuffer memory */ - memset(&res, 0, sizeof(res)); - res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; - res.name = simplefb_resname; - res.start = base; - res.end = res.start + length - 1; - if (res.end <= res.start) - return -EINVAL; - - pd = platform_device_register_resndata(NULL, "simple-framebuffer", 0, - &res, 1, mode, sizeof(*mode)); - return PTR_ERR_OR_ZERO(pd); -} |