diff options
Diffstat (limited to 'arch')
330 files changed, 10239 insertions, 5435 deletions
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index a83bbea62c67..0131a7058778 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -63,7 +63,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) return res >= 0 ? 1 : 0; } -static inline void __down_write(struct rw_semaphore *sem) +static inline long ___down_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -83,10 +83,24 @@ static inline void __down_write(struct rw_semaphore *sem) :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory"); #endif - if (unlikely(oldcount)) + return oldcount; +} + +static inline void __down_write(struct rw_semaphore *sem) +{ + if (unlikely(___down_write(sem))) rwsem_down_write_failed(sem); } +static inline int __down_write_killable(struct rw_semaphore *sem) +{ + if (unlikely(___down_write(sem))) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + + return 0; +} + /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 0827d594b1f0..cd0cd5fd09a3 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -106,7 +106,7 @@ pmc: pmc@fffffc00 { compatible = "atmel,at91sam9x5-pmc", "syscon"; - reg = <0xfffffc00 0x100>; + reg = <0xfffffc00 0x200>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; interrupt-controller; #address-cells = <1>; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 78996bdbd3df..9817090c1b73 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -280,7 +280,7 @@ status = "disabled"; nfc@c0000000 { - compatible = "atmel,sama5d4-nfc"; + compatible = "atmel,sama5d3-nfc"; #address-cells = <1>; #size-cells = <1>; reg = < /* NFC Command Registers */ diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index e0eea72deb87..a708fa1f0905 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -17,34 +17,28 @@ #include <asm/mach/map.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> +#include <asm/ptrace.h> #ifdef CONFIG_EFI void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); +int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define efi_call_virt(f, ...) \ -({ \ - efi_##f##_t *__f; \ - efi_status_t __s; \ - \ - efi_virtmap_load(); \ - __f = efi.systab->runtime->f; \ - __s = __f(__VA_ARGS__); \ - efi_virtmap_unload(); \ - __s; \ -}) +#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_teardown() efi_virtmap_unload() -#define __efi_call_virt(f, ...) \ +#define arch_efi_call_virt(f, args...) \ ({ \ efi_##f##_t *__f; \ - \ - efi_virtmap_load(); \ __f = efi.systab->runtime->f; \ - __f(__VA_ARGS__); \ - efi_virtmap_unload(); \ + __f(args); \ }) +#define ARCH_EFI_IRQ_FLAGS_MASK \ + (PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \ + PSR_T_BIT | MODE_MASK) + static inline void efi_set_pgd(struct mm_struct *mm) { check_and_switch_context(mm, NULL); @@ -59,7 +53,16 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define __efi_call_early(f, ...) f(__VA_ARGS__) +#define efi_is_64bit() (false) + +struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg); +void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si); + +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) +{ +} /* * A reasonable upper bound for the uncompressed kernel size is 32 MBytes, diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 385070180c25..738d5eee91de 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -265,6 +265,15 @@ static inline void __cpu_init_stage2(void) kvm_call_hyp(__init_stage2_translation); } +static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t phys_idmap_start) +{ + /* + * TODO + * kvm_call_reset(boot_pgd_ptr, phys_idmap_start); + */ +} + static inline int kvm_arch_dev_ioctl_check_extension(long ext) { return 0; @@ -277,7 +286,6 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index da44be9db4fa..f17a8d41822c 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -66,6 +66,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); +phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index fa5b42d44985..3cc14dd8587c 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -15,6 +15,7 @@ #include <linux/compiler.h> #include <linux/sched.h> +#include <linux/preempt.h> #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/proc-fns.h> @@ -66,6 +67,7 @@ static inline void check_and_switch_context(struct mm_struct *mm, cpu_switch_mm(mm->pgd, mm); } +#ifndef MODULE #define finish_arch_post_lock_switch \ finish_arch_post_lock_switch static inline void finish_arch_post_lock_switch(void) @@ -87,6 +89,7 @@ static inline void finish_arch_post_lock_switch(void) preempt_enable_no_resched(); } } +#endif /* !MODULE */ #endif /* CONFIG_MMU */ diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c index ff8a9d8acfac..9f43ba012d10 100644 --- a/arch/arm/kernel/efi.c +++ b/arch/arm/kernel/efi.c @@ -11,6 +11,41 @@ #include <asm/mach/map.h> #include <asm/mmu_context.h> +static int __init set_permissions(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + efi_memory_desc_t *md = data; + pte_t pte = *ptep; + + if (md->attribute & EFI_MEMORY_RO) + pte = set_pte_bit(pte, __pgprot(L_PTE_RDONLY)); + if (md->attribute & EFI_MEMORY_XP) + pte = set_pte_bit(pte, __pgprot(L_PTE_XN)); + set_pte_ext(ptep, pte, PTE_EXT_NG); + return 0; +} + +int __init efi_set_mapping_permissions(struct mm_struct *mm, + efi_memory_desc_t *md) +{ + unsigned long base, size; + + base = md->virt_addr; + size = md->num_pages << EFI_PAGE_SHIFT; + + /* + * We can only use apply_to_page_range() if we can guarantee that the + * entire region was mapped using pages. This should be the case if the + * region does not cover any naturally aligned SECTION_SIZE sized + * blocks. + */ + if (round_down(base + size, SECTION_SIZE) < + round_up(base, SECTION_SIZE) + SECTION_SIZE) + return apply_to_page_range(mm, base, size, set_permissions, md); + + return 0; +} + int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { struct map_desc desc = { @@ -34,5 +69,11 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) desc.type = MT_DEVICE; create_mapping_late(mm, &desc, true); + + /* + * If stricter permissions were specified, apply them now. + */ + if (md->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP)) + return efi_set_mapping_permissions(mm, md); return 0; } diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 6284779d64ee..b8df45883cf7 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) info->address &= ~alignment_mask; info->ctrl.len <<= offset; - if (!bp->overflow_handler) { + if (is_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!wp->overflow_handler) + if (is_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); unlock: diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 4e02ae5950ff..27563befa8a2 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) tail = (struct frame_tail __user *)regs->ARM_fp - 1; - while ((entry->nr < PERF_MAX_STACK_DEPTH) && + while ((entry->nr < sysctl_perf_event_max_stack) && tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 2c4bea39cf22..7d4e2850910c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -883,7 +883,8 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) request_resource(&ioport_resource, &lp2); } -#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) +#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) || \ + defined(CONFIG_EFI) struct screen_info screen_info = { .orig_video_lines = 30, .orig_video_cols = 80, diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index dded1b763c16..9ef013d86cc5 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -16,7 +16,6 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include <linux/cpu.h> #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/err.h> @@ -66,6 +65,8 @@ static DEFINE_SPINLOCK(kvm_vmid_lock); static bool vgic_present; +static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); + static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -90,11 +91,6 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void) -{ - return 0; -} - int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -1033,11 +1029,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } } -static void cpu_init_stage2(void *dummy) -{ - __cpu_init_stage2(); -} - static void cpu_init_hyp_mode(void *dummy) { phys_addr_t boot_pgd_ptr; @@ -1065,43 +1056,87 @@ static void cpu_hyp_reinit(void) { if (is_kernel_in_hyp_mode()) { /* - * cpu_init_stage2() is safe to call even if the PM + * __cpu_init_stage2() is safe to call even if the PM * event was cancelled before the CPU was reset. */ - cpu_init_stage2(NULL); + __cpu_init_stage2(); } else { if (__hyp_get_vectors() == hyp_default_vectors) cpu_init_hyp_mode(NULL); } } -static int hyp_init_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) +static void cpu_hyp_reset(void) +{ + phys_addr_t boot_pgd_ptr; + phys_addr_t phys_idmap_start; + + if (!is_kernel_in_hyp_mode()) { + boot_pgd_ptr = kvm_mmu_get_boot_httbr(); + phys_idmap_start = kvm_get_idmap_start(); + + __cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start); + } +} + +static void _kvm_arch_hardware_enable(void *discard) { - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: + if (!__this_cpu_read(kvm_arm_hardware_enabled)) { cpu_hyp_reinit(); + __this_cpu_write(kvm_arm_hardware_enabled, 1); } +} + +int kvm_arch_hardware_enable(void) +{ + _kvm_arch_hardware_enable(NULL); + return 0; +} - return NOTIFY_OK; +static void _kvm_arch_hardware_disable(void *discard) +{ + if (__this_cpu_read(kvm_arm_hardware_enabled)) { + cpu_hyp_reset(); + __this_cpu_write(kvm_arm_hardware_enabled, 0); + } } -static struct notifier_block hyp_init_cpu_nb = { - .notifier_call = hyp_init_cpu_notify, -}; +void kvm_arch_hardware_disable(void) +{ + _kvm_arch_hardware_disable(NULL); +} #ifdef CONFIG_CPU_PM static int hyp_init_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT) { - cpu_hyp_reinit(); + /* + * kvm_arm_hardware_enabled is left with its old value over + * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should + * re-enable hyp. + */ + switch (cmd) { + case CPU_PM_ENTER: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* + * don't update kvm_arm_hardware_enabled here + * so that the hardware will be re-enabled + * when we resume. See below. + */ + cpu_hyp_reset(); + + return NOTIFY_OK; + case CPU_PM_EXIT: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* The hardware was enabled before suspend. */ + cpu_hyp_reinit(); + return NOTIFY_OK; - } - return NOTIFY_DONE; + default: + return NOTIFY_DONE; + } } static struct notifier_block hyp_init_cpu_pm_nb = { @@ -1143,16 +1178,12 @@ static int init_common_resources(void) static int init_subsystems(void) { - int err; + int err = 0; /* - * Register CPU Hotplug notifier + * Enable hardware so that subsystem initialisation can access EL2. */ - err = register_cpu_notifier(&hyp_init_cpu_nb); - if (err) { - kvm_err("Cannot register KVM init CPU notifier (%d)\n", err); - return err; - } + on_each_cpu(_kvm_arch_hardware_enable, NULL, 1); /* * Register CPU lower-power notifier @@ -1170,9 +1201,10 @@ static int init_subsystems(void) case -ENODEV: case -ENXIO: vgic_present = false; + err = 0; break; default: - return err; + goto out; } /* @@ -1180,12 +1212,15 @@ static int init_subsystems(void) */ err = kvm_timer_hyp_init(); if (err) - return err; + goto out; kvm_perf_init(); kvm_coproc_table_init(); - return 0; +out: + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + + return err; } static void teardown_hyp_mode(void) @@ -1198,17 +1233,11 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - unregister_cpu_notifier(&hyp_init_cpu_nb); hyp_cpu_pm_exit(); } static int init_vhe_mode(void) { - /* - * Execute the init code on each CPU. - */ - on_each_cpu(cpu_init_stage2, NULL, 1); - /* set size of VMID supported by CPU */ kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); @@ -1295,11 +1324,6 @@ static int init_hyp_mode(void) } } - /* - * Execute the init code on each CPU. - */ - on_each_cpu(cpu_init_hyp_mode, NULL, 1); - #ifndef CONFIG_HOTPLUG_CPU free_boot_hyp_pgd(); #endif diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index d6d4191e68f2..be302128c5d7 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1666,6 +1666,11 @@ phys_addr_t kvm_get_idmap_vector(void) return hyp_idmap_vector; } +phys_addr_t kvm_get_idmap_start(void) +{ + return hyp_idmap_start; +} + int kvm_mmu_init(void) { int err; diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 725e693639d2..add3771d38f6 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -751,16 +751,6 @@ static struct resource da8xx_mmcsd0_resources[] = { .end = IRQ_DA8XX_MMCSDINT0, .flags = IORESOURCE_IRQ, }, - { /* DMA RX */ - .start = DA8XX_DMA_MMCSD0_RX, - .end = DA8XX_DMA_MMCSD0_RX, - .flags = IORESOURCE_DMA, - }, - { /* DMA TX */ - .start = DA8XX_DMA_MMCSD0_TX, - .end = DA8XX_DMA_MMCSD0_TX, - .flags = IORESOURCE_DMA, - }, }; static struct platform_device da8xx_mmcsd0_device = { @@ -788,16 +778,6 @@ static struct resource da850_mmcsd1_resources[] = { .end = IRQ_DA850_MMCSDINT0_1, .flags = IORESOURCE_IRQ, }, - { /* DMA RX */ - .start = DA850_DMA_MMCSD1_RX, - .end = DA850_DMA_MMCSD1_RX, - .flags = IORESOURCE_DMA, - }, - { /* DMA TX */ - .start = DA850_DMA_MMCSD1_TX, - .end = DA850_DMA_MMCSD1_TX, - .flags = IORESOURCE_DMA, - }, }; static struct platform_device da850_mmcsd1_device = { diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 6257aa452568..67d26c5bda0b 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -144,14 +144,6 @@ static struct resource mmcsd0_resources[] = { .start = IRQ_SDIOINT, .flags = IORESOURCE_IRQ, }, - /* DMA channels: RX, then TX */ - { - .start = EDMA_CTLR_CHAN(0, DAVINCI_DMA_MMCRXEVT), - .flags = IORESOURCE_DMA, - }, { - .start = EDMA_CTLR_CHAN(0, DAVINCI_DMA_MMCTXEVT), - .flags = IORESOURCE_DMA, - }, }; static struct platform_device davinci_mmcsd0_device = { @@ -181,14 +173,6 @@ static struct resource mmcsd1_resources[] = { .start = IRQ_DM355_SDIOINT1, .flags = IORESOURCE_IRQ, }, - /* DMA channels: RX, then TX */ - { - .start = EDMA_CTLR_CHAN(0, 30), /* rx */ - .flags = IORESOURCE_DMA, - }, { - .start = EDMA_CTLR_CHAN(0, 31), /* tx */ - .flags = IORESOURCE_DMA, - }, }; static struct platform_device davinci_mmcsd1_device = { diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index 575195be6687..65e1817d8afe 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -38,6 +38,8 @@ extern void socfpga_init_clocks(void); extern void socfpga_sysmgr_init(void); void socfpga_init_l2_ecc(void); void socfpga_init_ocram_ecc(void); +void socfpga_init_arria10_l2_ecc(void); +void socfpga_init_arria10_ocram_ecc(void); extern void __iomem *sys_manager_base_addr; extern void __iomem *rst_manager_base_addr; diff --git a/arch/arm/mach-socfpga/l2_cache.c b/arch/arm/mach-socfpga/l2_cache.c index e3907ab58d05..4267c95f2158 100644 --- a/arch/arm/mach-socfpga/l2_cache.c +++ b/arch/arm/mach-socfpga/l2_cache.c @@ -17,6 +17,20 @@ #include <linux/of_platform.h> #include <linux/of_address.h> +#include "core.h" + +/* A10 System Manager L2 ECC Control register */ +#define A10_MPU_CTRL_L2_ECC_OFST 0x0 +#define A10_MPU_CTRL_L2_ECC_EN BIT(0) + +/* A10 System Manager Global IRQ Mask register */ +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_CLR_L2 BIT(0) + +/* A10 System Manager L2 ECC IRQ Clear register */ +#define A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST 0xA8 +#define A10_SYSMGR_MPU_CLEAR_L2_ECC (BIT(31) | BIT(15)) + void socfpga_init_l2_ecc(void) { struct device_node *np; @@ -39,3 +53,38 @@ void socfpga_init_l2_ecc(void) writel(0x01, mapped_l2_edac_addr); iounmap(mapped_l2_edac_addr); } + +void socfpga_init_arria10_l2_ecc(void) +{ + struct device_node *np; + void __iomem *mapped_l2_edac_addr; + + /* Find the L2 EDAC device tree node */ + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-l2-ecc"); + if (!np) { + pr_err("Unable to find socfpga-a10-l2-ecc in dtb\n"); + return; + } + + mapped_l2_edac_addr = of_iomap(np, 0); + of_node_put(np); + if (!mapped_l2_edac_addr) { + pr_err("Unable to find L2 ECC mapping in dtb\n"); + return; + } + + if (!sys_manager_base_addr) { + pr_err("System Mananger not mapped for L2 ECC\n"); + goto exit; + } + /* Clear any pending IRQs */ + writel(A10_SYSMGR_MPU_CLEAR_L2_ECC, (sys_manager_base_addr + + A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST)); + /* Enable ECC */ + writel(A10_SYSMGR_ECC_INTMASK_CLR_L2, sys_manager_base_addr + + A10_SYSMGR_ECC_INTMASK_CLR_OFST); + writel(A10_MPU_CTRL_L2_ECC_EN, mapped_l2_edac_addr + + A10_MPU_CTRL_L2_ECC_OFST); +exit: + iounmap(mapped_l2_edac_addr); +} diff --git a/arch/arm/mach-socfpga/ocram.c b/arch/arm/mach-socfpga/ocram.c index 60ec643ac2be..10d673252395 100644 --- a/arch/arm/mach-socfpga/ocram.c +++ b/arch/arm/mach-socfpga/ocram.c @@ -13,12 +13,15 @@ * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/delay.h> #include <linux/io.h> #include <linux/genalloc.h> #include <linux/module.h> #include <linux/of_address.h> #include <linux/of_platform.h> +#include "core.h" + #define ALTR_OCRAM_CLEAR_ECC 0x00000018 #define ALTR_OCRAM_ECC_EN 0x00000019 @@ -47,3 +50,133 @@ void socfpga_init_ocram_ecc(void) iounmap(mapped_ocr_edac_addr); } + +/* Arria10 OCRAM Section */ +#define ALTR_A10_ECC_CTRL_OFST 0x08 +#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0)) +#define ALTR_A10_ECC_INITA BIT(16) + +#define ALTR_A10_ECC_INITSTAT_OFST 0x0C +#define ALTR_A10_ECC_INITCOMPLETEA BIT(0) +#define ALTR_A10_ECC_INITCOMPLETEB BIT(8) + +#define ALTR_A10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_A10_ECC_SERRINTEN BIT(0) + +#define ALTR_A10_ECC_INTSTAT_OFST 0x20 +#define ALTR_A10_ECC_SERRPENA BIT(0) +#define ALTR_A10_ECC_DERRPENA BIT(8) +#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \ + ALTR_A10_ECC_DERRPENA) +/* ECC Manager Defines */ +#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) + +#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 + +static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value |= bit_mask; + writel(value, ioaddr); +} + +static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value &= ~bit_mask; + writel(value, ioaddr); +} + +static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + return (value & bit_mask) ? 1 : 0; +} + +/* + * This function uses the memory initialization block in the Arria10 ECC + * controller to initialize/clear the entire memory data and ECC data. + */ +static int altr_init_memory_port(void __iomem *ioaddr) +{ + int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US; + + ecc_set_bits(ALTR_A10_ECC_INITA, (ioaddr + ALTR_A10_ECC_CTRL_OFST)); + while (limit--) { + if (ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, + (ioaddr + ALTR_A10_ECC_INITSTAT_OFST))) + break; + udelay(1); + } + if (limit < 0) + return -EBUSY; + + /* Clear any pending ECC interrupts */ + writel(ALTR_A10_ECC_ERRPENA_MASK, + (ioaddr + ALTR_A10_ECC_INTSTAT_OFST)); + + return 0; +} + +void socfpga_init_arria10_ocram_ecc(void) +{ + struct device_node *np; + int ret = 0; + void __iomem *ecc_block_base; + + if (!sys_manager_base_addr) { + pr_err("SOCFPGA: sys-mgr is not initialized\n"); + return; + } + + /* Find the OCRAM EDAC device tree node */ + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ocram-ecc"); + if (!np) { + pr_err("Unable to find socfpga-a10-ocram-ecc\n"); + return; + } + + /* Map the ECC Block */ + ecc_block_base = of_iomap(np, 0); + of_node_put(np); + if (!ecc_block_base) { + pr_err("Unable to map OCRAM ECC block\n"); + return; + } + + /* Disable ECC */ + writel(ALTR_A10_OCRAM_ECC_EN_CTL, + sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_SET_OFST); + ecc_clear_bits(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST)); + ecc_clear_bits(ALTR_A10_OCRAM_ECC_EN_CTL, + (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); + + /* Ensure all writes complete */ + wmb(); + + /* Use HW initialization block to initialize memory for ECC */ + ret = altr_init_memory_port(ecc_block_base); + if (ret) { + pr_err("ECC: cannot init OCRAM PORTA memory\n"); + goto exit; + } + + /* Enable ECC */ + ecc_set_bits(ALTR_A10_OCRAM_ECC_EN_CTL, + (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); + ecc_set_bits(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST)); + writel(ALTR_A10_OCRAM_ECC_EN_CTL, + sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_CLR_OFST); + + /* Ensure all writes complete */ + wmb(); +exit: + iounmap(ecc_block_base); +} diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index 7e0aad2ec3d1..dde14f7bf2c3 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -66,6 +66,16 @@ static void __init socfpga_init_irq(void) socfpga_init_ocram_ecc(); } +static void __init socfpga_arria10_init_irq(void) +{ + irqchip_init(); + socfpga_sysmgr_init(); + if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C)) + socfpga_init_arria10_l2_ecc(); + if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM)) + socfpga_init_arria10_ocram_ecc(); +} + static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd) { u32 temp; @@ -113,7 +123,7 @@ static const char *altera_a10_dt_match[] = { DT_MACHINE_START(SOCFPGA_A10, "Altera SOCFPGA Arria10") .l2c_aux_val = 0, .l2c_aux_mask = ~0, - .init_irq = socfpga_init_irq, + .init_irq = socfpga_arria10_init_irq, .restart = socfpga_arria10_restart, .dt_compat = altera_a10_dt_match, MACHINE_END diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4f436220384f..8845c0d100d7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS @@ -58,11 +59,14 @@ config ARM64 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARM_SMCCC select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL + select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG @@ -76,6 +80,7 @@ config ARM64 select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP if NUMA select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -89,15 +94,13 @@ config ARM64 select NO_BOOTMEM select OF select OF_EARLY_FLATTREE + select OF_NUMA if NUMA && OF select OF_RESERVED_MEM select PERF_USE_VMALLOC select POWER_RESET select POWER_SUPPLY - select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE - select HAVE_CONTEXT_TRACKING - select HAVE_ARM_SMCCC help ARM 64-bit (AArch64) Linux support. @@ -546,10 +549,35 @@ config HOTPLUG_CPU Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. +# Common NUMA Features +config NUMA + bool "Numa Memory Allocation and Scheduler Support" + depends on SMP + help + Enable NUMA (Non Uniform Memory Access) support. + + The kernel will try to allocate memory used by a CPU on the + local memory of the CPU and add some more + NUMA awareness to the kernel. + +config NODES_SHIFT + int "Maximum NUMA Nodes (as a power of 2)" + range 1 10 + default "2" + depends on NEED_MULTIPLE_NODES + help + Specify the maximum number of NUMA Nodes available on the target + system. Increases memory reserved to accommodate various tables. + +config USE_PERCPU_NUMA_NODE_ID + def_bool y + depends on NUMA + source kernel/Kconfig.preempt source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC + depends on !HIBERNATION def_bool y config ARCH_HAS_HOLES_MEMORYMODEL @@ -578,9 +606,6 @@ config SYS_SUPPORTS_HUGETLBFS config ARCH_WANT_HUGE_PMD_SHARE def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) -config HAVE_ARCH_TRANSPARENT_HUGEPAGE - def_bool y - config ARCH_HAS_CACHE_LINE_SIZE def_bool y @@ -953,6 +978,14 @@ menu "Power management options" source "kernel/power/Kconfig" +config ARCH_HIBERNATION_POSSIBLE + def_bool y + depends on CPU_PM + +config ARCH_HIBERNATION_HEADER + def_bool y + depends on HIBERNATION + config ARCH_SUSPEND_POSSIBLE def_bool y diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 7e76845a0434..710fde4ad0f0 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -59,7 +59,7 @@ config DEBUG_RODATA If in doubt, say Y config DEBUG_ALIGN_RODATA - depends on DEBUG_RODATA && ARM64_4K_PAGES + depends on DEBUG_RODATA bool "Align linker sections up to SECTION_SIZE" help If this option is enabled, sections that may potentially be marked as diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index efa77c146415..521b1ec59157 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -2,6 +2,7 @@ menu "Platform selection" config ARCH_SUNXI bool "Allwinner sunxi 64-bit SoC Family" + select GENERIC_IRQ_CHIP help This enables support for Allwinner sunxi based SoCs like the A64. diff --git a/arch/arm64/boot/dts/broadcom/vulcan.dtsi b/arch/arm64/boot/dts/broadcom/vulcan.dtsi index 85820e2bca9d..34e11a9db2a0 100644 --- a/arch/arm64/boot/dts/broadcom/vulcan.dtsi +++ b/arch/arm64/boot/dts/broadcom/vulcan.dtsi @@ -86,7 +86,7 @@ }; pmu { - compatible = "arm,armv8-pmuv3"; + compatible = "brcm,vulcan-pmu", "arm,armv8-pmuv3"; interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_HIGH>; /* PMU overflow */ }; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 70f7b9e04598..10b017c4bdd8 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -1,5 +1,5 @@ /* - * Based on arch/arm/include/asm/assembler.h + * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S * * Copyright (C) 1996-2000 Russell King * Copyright (C) 2012 ARM Ltd. @@ -23,22 +23,13 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/thread_info.h> /* - * Stack pushing/popping (register pairs only). Equivalent to store decrement - * before, load increment after. - */ - .macro push, xreg1, xreg2 - stp \xreg1, \xreg2, [sp, #-16]! - .endm - - .macro pop, xreg1, xreg2 - ldp \xreg1, \xreg2, [sp], #16 - .endm - -/* * Enable and disable interrupts. */ .macro disable_irq @@ -212,6 +203,102 @@ lr .req x30 // link register .endm /* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register. + */ + .macro icache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map + */ + .macro tcr_set_idmap_t0sz, valreg, tmpreg +#ifndef CONFIG_ARM64_VA_BITS_48 + ldr_l \tmpreg, idmap_t0sz + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH +#endif + .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm + +/* + * copy_page - copy src to dest using temp registers t1-t8 + */ + .macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req +9998: ldp \t1, \t2, [\src] + ldp \t3, \t4, [\src, #16] + ldp \t5, \t6, [\src, #32] + ldp \t7, \t8, [\src, #48] + add \src, \src, #64 + stnp \t1, \t2, [\dest] + stnp \t3, \t4, [\dest, #16] + stnp \t5, \t6, [\dest, #32] + stnp \t7, \t8, [\dest, #48] + add \dest, \dest, #64 + tst \src, #(PAGE_SIZE - 1) + b.ne 9998b + .endm + +/* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. */ @@ -233,4 +320,24 @@ lr .req x30 // link register .long \sym\()_hi32 .endm + /* + * mov_q - move an immediate constant into a 64-bit register using + * between 2 and 4 movz/movk instructions (depending on the + * magnitude and sign of the operand) + */ + .macro mov_q, reg, val + .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff) + movz \reg, :abs_g1_s:\val + .else + .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff) + movz \reg, :abs_g2_s:\val + .else + movz \reg, :abs_g3:\val + movk \reg, :abs_g2_nc:\val + .endif + movk \reg, :abs_g1_nc:\val + .endif + movk \reg, :abs_g0_nc:\val + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b9b649422fca..224efe730e46 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -35,8 +35,9 @@ #define ARM64_ALT_PAN_NOT_UAO 10 #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 +#define ARM64_HAS_32BIT_EL0 13 -#define ARM64_NCAPS 13 +#define ARM64_NCAPS 14 #ifndef __ASSEMBLY__ @@ -77,10 +78,17 @@ struct arm64_ftr_reg { struct arm64_ftr_bits *ftr_bits; }; +/* scope of capability check */ +enum { + SCOPE_SYSTEM, + SCOPE_LOCAL_CPU, +}; + struct arm64_cpu_capabilities { const char *desc; u16 capability; - bool (*matches)(const struct arm64_cpu_capabilities *); + int def_scope; /* default scope */ + bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); void (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ @@ -101,6 +109,8 @@ struct arm64_cpu_capabilities { extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +bool this_cpu_has_cap(unsigned int cap); + static inline bool cpu_have_feature(unsigned int num) { return elf_hwcap & (1UL << num); @@ -170,12 +180,20 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; } +static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); + + return val == ID_AA64PFR0_EL0_32BIT_64BIT; +} + void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); +void verify_local_cpu_errata(void); void verify_local_cpu_capabilities(void); u64 read_system_reg(u32 id); @@ -185,6 +203,11 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } +static inline bool system_supports_32bit_el0(void) +{ + return cpus_have_cap(ARM64_HAS_32BIT_EL0); +} + static inline bool system_supports_mixed_endian_el0(void) { return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 8e88a696c9cb..622db3c6474e 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -4,6 +4,7 @@ #include <asm/io.h> #include <asm/mmu_context.h> #include <asm/neon.h> +#include <asm/ptrace.h> #include <asm/tlbflush.h> #ifdef CONFIG_EFI @@ -14,32 +15,29 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); -#define efi_call_virt(f, ...) \ +#define efi_set_mapping_permissions efi_create_mapping + +#define arch_efi_call_virt_setup() \ ({ \ - efi_##f##_t *__f; \ - efi_status_t __s; \ - \ kernel_neon_begin(); \ efi_virtmap_load(); \ - __f = efi.systab->runtime->f; \ - __s = __f(__VA_ARGS__); \ - efi_virtmap_unload(); \ - kernel_neon_end(); \ - __s; \ }) -#define __efi_call_virt(f, ...) \ +#define arch_efi_call_virt(f, args...) \ ({ \ efi_##f##_t *__f; \ - \ - kernel_neon_begin(); \ - efi_virtmap_load(); \ __f = efi.systab->runtime->f; \ - __f(__VA_ARGS__); \ + __f(args); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ efi_virtmap_unload(); \ kernel_neon_end(); \ }) +#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) + /* arch specific definitions used by the stub code */ /* @@ -50,7 +48,16 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); #define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ #define MAX_FDT_OFFSET SZ_512M -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define __efi_call_early(f, ...) f(__VA_ARGS__) +#define efi_is_64bit() (true) + +#define alloc_screen_info(x...) &screen_info +#define free_screen_info(x...) + +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) +{ +} #define EFI_ALLOC_ALIGN SZ_64K diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 24ed037f09fd..7a09c48c0475 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -177,7 +177,8 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; /* AArch32 EABI. */ #define EF_ARM_EABI_MASK 0xff000000 -#define compat_elf_check_arch(x) (((x)->e_machine == EM_ARM) && \ +#define compat_elf_check_arch(x) (system_supports_32bit_el0() && \ + ((x)->e_machine == EM_ARM) && \ ((x)->e_flags & EF_ARM_EABI_MASK)) #define compat_start_thread compat_start_thread diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 5c6375d8528b..7e51d1b57c0c 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -19,6 +19,7 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H +#include <asm/sparsemem.h> /* * The linear mapping and the start of memory are both 2M aligned (per @@ -86,10 +87,24 @@ * (64k granule), or a multiple that can be mapped using contiguous bits * in the page tables: 32 * PMD_SIZE (16k granule) */ -#ifdef CONFIG_ARM64_64K_PAGES -#define ARM64_MEMSTART_ALIGN SZ_512M +#if defined(CONFIG_ARM64_4K_PAGES) +#define ARM64_MEMSTART_SHIFT PUD_SHIFT +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ARM64_MEMSTART_SHIFT (PMD_SHIFT + 5) #else -#define ARM64_MEMSTART_ALIGN SZ_1G +#define ARM64_MEMSTART_SHIFT PMD_SHIFT +#endif + +/* + * sparsemem vmemmap imposes an additional requirement on the alignment of + * memstart_addr, due to the fact that the base of the vmemmap region + * has a direct correspondence, and needs to appear sufficiently aligned + * in the virtual address space. + */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS +#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) +#else +#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) #endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3f29887995bc..1b3dc9df5257 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -84,17 +84,6 @@ #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -/* Hyp System Control Register (SCTLR_EL2) bits */ -#define SCTLR_EL2_EE (1 << 25) -#define SCTLR_EL2_WXN (1 << 19) -#define SCTLR_EL2_I (1 << 12) -#define SCTLR_EL2_SA (1 << 3) -#define SCTLR_EL2_C (1 << 2) -#define SCTLR_EL2_A (1 << 1) -#define SCTLR_EL2_M 1 -#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ - SCTLR_EL2_SA | SCTLR_EL2_I) - /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 40a0a24e6c98..7561f63f1c28 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -22,6 +22,8 @@ #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE 2 #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) @@ -40,6 +42,7 @@ struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; +extern char __kvm_hyp_reset[]; extern char __kvm_hyp_vector[]; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f5c6bd2541ef..90a8d2336ceb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,8 @@ int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); +unsigned long kvm_hyp_reset_entry(void); +void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_arch { /* The VMID generation used for the virt. memory system */ @@ -352,7 +354,17 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } -static inline void kvm_arch_hardware_disable(void) {} +static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t phys_idmap_start) +{ + /* + * Call reset code, and switch back to stub hyp vectors. + * Uses __kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation. + */ + __kvm_call_hyp((void *)kvm_hyp_reset_entry(), + boot_pgd_ptr, phys_idmap_start); +} + static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 22732a5e3119..e8d39d4f86b6 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -109,6 +109,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); +phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 12f8a00fb3f1..72a3025bb583 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -40,6 +40,21 @@ #define PCI_IO_SIZE SZ_16M /* + * Log2 of the upper bound of the size of a struct page. Used for sizing + * the vmemmap region only, does not affect actual memory footprint. + * We don't use sizeof(struct page) directly since taking its size here + * requires its definition to be available at this point in the inclusion + * chain, and it may not be a power of 2 in the first place. + */ +#define STRUCT_PAGE_MAX_SHIFT 6 + +/* + * VMEMMAP_SIZE - allows the whole linear region to be covered by + * a struct page array + */ +#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)) + +/* * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. @@ -54,7 +69,8 @@ #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) #define MODULES_VSIZE (SZ_128M) -#define PCI_IO_END (PAGE_OFFSET - SZ_2M) +#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) +#define PCI_IO_END (VMEMMAP_START - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) #define TASK_SIZE_64 (UL(1) << VA_BITS) @@ -71,6 +87,9 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) +#define KERNEL_START _text +#define KERNEL_END _end + /* * The size of the KASAN shadow region. This should be 1/8th of the * size of the entire kernel virtual address space. @@ -192,9 +211,19 @@ static inline void *phys_to_virt(phys_addr_t x) */ #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) +#ifndef CONFIG_SPARSEMEM_VMEMMAP #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#else +#define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) +#define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) +#define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) +#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) + +#define virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ + + PHYS_OFFSET) >> PAGE_SHIFT) +#endif #endif #include <asm-generic/memory_model.h> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 990124a67eeb..97b1d8f26b9c 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -29,6 +29,7 @@ typedef struct { #define ASID(mm) ((mm)->context.id.counter & 0xffff) extern void paging_init(void); +extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h new file mode 100644 index 000000000000..a0de9e6ba73f --- /dev/null +++ b/arch/arm64/include/asm/mmzone.h @@ -0,0 +1,12 @@ +#ifndef __ASM_MMZONE_H +#define __ASM_MMZONE_H + +#ifdef CONFIG_NUMA + +#include <asm/numa.h> + +extern struct pglist_data *node_data[]; +#define NODE_DATA(nid) (node_data[(nid)]) + +#endif /* CONFIG_NUMA */ +#endif /* __ASM_MMZONE_H */ diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h new file mode 100644 index 000000000000..e9b4f2942335 --- /dev/null +++ b/arch/arm64/include/asm/numa.h @@ -0,0 +1,45 @@ +#ifndef __ASM_NUMA_H +#define __ASM_NUMA_H + +#include <asm/topology.h> + +#ifdef CONFIG_NUMA + +/* currently, arm64 implements flat NUMA topology */ +#define parent_node(node) (node) + +int __node_distance(int from, int to); +#define node_distance(a, b) __node_distance(a, b) + +extern nodemask_t numa_nodes_parsed __initdata; + +/* Mappings between node number and cpus on that node. */ +extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; +void numa_clear_node(unsigned int cpu); + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +const struct cpumask *cpumask_of_node(int node); +#else +/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +static inline const struct cpumask *cpumask_of_node(int node) +{ + return node_to_cpumask_map[node]; +} +#endif + +void __init arm64_numa_init(void); +int __init numa_add_memblk(int nodeid, u64 start, u64 end); +void __init numa_set_distance(int from, int to, int distance); +void __init numa_free_distance(void); +void __init early_map_cpu_to_node(unsigned int cpu, int nid); +void numa_store_cpu_info(unsigned int cpu); + +#else /* CONFIG_NUMA */ + +static inline void numa_store_cpu_info(unsigned int cpu) { } +static inline void arm64_numa_init(void) { } +static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { } + +#endif /* CONFIG_NUMA */ + +#endif /* __ASM_NUMA_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index ae615b9d9a55..17b45f7d96d3 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -19,6 +19,8 @@ #ifndef __ASM_PAGE_H #define __ASM_PAGE_H +#include <linux/const.h> + /* PAGE_SHIFT determines the page size */ /* CONT_SHIFT determines the number of pages which can be tracked together */ #ifdef CONFIG_ARM64_64K_PAGES diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5c25b831273d..9786f770088d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -133,7 +133,6 @@ * Section */ #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 2b1bd7e52c3b..69b2fd41503c 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -27,10 +27,6 @@ typedef u64 pmdval_t; typedef u64 pudval_t; typedef u64 pgdval_t; -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS - /* * These are used to make use of C type-checking.. */ @@ -58,34 +54,6 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) -#else /* !STRICT_MM_TYPECHECKS */ - -typedef pteval_t pte_t; -#define pte_val(x) (x) -#define __pte(x) (x) - -#if CONFIG_PGTABLE_LEVELS > 2 -typedef pmdval_t pmd_t; -#define pmd_val(x) (x) -#define __pmd(x) (x) -#endif - -#if CONFIG_PGTABLE_LEVELS > 3 -typedef pudval_t pud_t; -#define pud_val(x) (x) -#define __pud(x) (x) -#endif - -typedef pgdval_t pgd_t; -#define pgd_val(x) (x) -#define __pgd(x) (x) - -typedef pteval_t pgprot_t; -#define pgprot_val(x) (x) -#define __pgprot(x) (x) - -#endif /* STRICT_MM_TYPECHECKS */ - #if CONFIG_PGTABLE_LEVELS == 2 #include <asm-generic/pgtable-nopmd.h> #elif CONFIG_PGTABLE_LEVELS == 3 diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 989fef16d461..2da46ae9c991 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -24,22 +24,16 @@ #include <asm/pgtable-prot.h> /* - * VMALLOC and SPARSEMEM_VMEMMAP ranges. + * VMALLOC range. * - * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array - * (rounded up to PUD_SIZE). * VMALLOC_START: beginning of the kernel vmalloc space - * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, - * fixed mappings and modules + * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space + * and fixed mappings */ -#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) - #define VMALLOC_START (MODULES_END) #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) -#define VMEMMAP_START (VMALLOC_END + SZ_64K) -#define vmemmap ((struct page *)VMEMMAP_START - \ - SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT)) +#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) #define FIRST_USER_ADDRESS 0UL @@ -58,7 +52,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) +#define ZERO_PAGE(vaddr) pfn_to_page(PHYS_PFN(__pa(empty_zero_page))) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) @@ -272,6 +266,21 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot) return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); } +#ifdef CONFIG_NUMA_BALANCING +/* + * See the comment in include/asm-generic/pgtable.h + */ +static inline int pte_protnone(pte_t pte) +{ + return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE; +} + +static inline int pmd_protnone(pmd_t pmd) +{ + return pte_protnone(pmd_pte(pmd)); +} +#endif + /* * THP definitions. */ @@ -280,15 +289,16 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot) #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) -#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) +#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) @@ -327,9 +337,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_present(pmd) (pmd_val(pmd)) -#define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) +#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT)) #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_TABLE) @@ -394,7 +403,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!(pud_val(pud) & 2)) +#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) (pud_val(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) @@ -526,6 +535,21 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) } #ifdef CONFIG_ARM64_HW_AFDBM +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +extern int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty); +} +#endif + /* * Atomic pte/pmd modifications. */ @@ -578,9 +602,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) { return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp)); } diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 817a067ba058..433e50405274 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -113,6 +113,17 @@ static inline void update_cpu_boot_status(int val) dsb(ishst); } +/* + * The calling secondary CPU has detected serious configuration mismatch, + * which calls for a kernel panic. Update the boot status and park the calling + * CPU. + */ +static inline void cpu_panic_kernel(void) +{ + update_cpu_boot_status(CPU_PANIC_KERNEL); + cpu_park_loop(); +} + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 59a5b0f1e81c..024d623f662e 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -1,7 +1,8 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 11 +#define NR_CTX_REGS 10 +#define NR_CALLEE_SAVED_REGS 12 /* * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on @@ -16,11 +17,34 @@ struct cpu_suspend_ctx { u64 sp; } __aligned(16); -struct sleep_save_sp { - phys_addr_t *save_ptr_stash; - phys_addr_t save_ptr_stash_phys; +/* + * Memory to save the cpu state is allocated on the stack by + * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter(). + * This data must survive until cpu_resume() is called. + * + * This struct desribes the size and the layout of the saved cpu state. + * The layout of the callee_saved_regs is defined by the implementation + * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed + * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it + * returns, and the data would be subsequently corrupted by the call to the + * finisher. + */ +struct sleep_stack_data { + struct cpu_suspend_ctx system_regs; + unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS]; }; +extern unsigned long *sleep_save_stash; + extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); +int __cpu_suspend_enter(struct sleep_stack_data *state); +void __cpu_suspend_exit(void); +void _cpu_resume(void); + +int swsusp_arch_suspend(void); +int swsusp_arch_resume(void); +int arch_hibernation_header_save(void *addr, unsigned int max_size); +int arch_hibernation_header_restore(void *addr); + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 12874164b0ae..751e901c8d37 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -86,10 +86,21 @@ #define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\ (!!x)<<8 | 0x1f) -/* SCTLR_EL1 */ -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) -#define SCTLR_EL1_SPAN (0x1 << 23) +/* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_EE (1 << 25) +#define SCTLR_ELx_I (1 << 12) +#define SCTLR_ELx_SA (1 << 3) +#define SCTLR_ELx_C (1 << 2) +#define SCTLR_ELx_A (1 << 1) +#define SCTLR_ELx_M 1 + +#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I) + +/* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_SED (1 << 8) +#define SCTLR_EL1_CP15BEN (1 << 5) /* id_aa64isar0 */ @@ -115,6 +126,7 @@ #define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 #define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 @@ -145,7 +157,11 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_LVA_SHIFT 16 +#define ID_AA64MMFR2_IESB_SHIFT 12 +#define ID_AA64MMFR2_LSM_SHIFT 8 #define ID_AA64MMFR2_UAO_SHIFT 4 +#define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index a3e9d6fdbf21..8b57339823e9 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -22,6 +22,16 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +#ifdef CONFIG_NUMA + +struct pci_bus; +int pcibus_to_node(struct pci_bus *bus); +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + +#endif /* CONFIG_NUMA */ + #include <asm-generic/topology.h> #endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 9f22dd607958..dcbcf8dcbefb 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -18,6 +18,22 @@ #ifndef __ASM__VIRT_H #define __ASM__VIRT_H +/* + * The arm64 hcall implementation uses x0 to specify the hcall type. A value + * less than 0xfff indicates a special hcall, such as get/set vector. + * Any other value is used as a pointer to the function to call. + */ + +/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */ +#define HVC_GET_VECTORS 0 + +/* + * HVC_SET_VECTORS - Set the value of the vbar_el2 register. + * + * @x1: Physical address of the new vector table. + */ +#define HVC_SET_VECTORS 1 + #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) @@ -60,6 +76,12 @@ static inline bool is_kernel_in_hyp_mode(void) return el == CurrentEL_EL2; } +#ifdef CONFIG_ARM64_VHE +extern void verify_cpu_run_el(void); +#else +static inline void verify_cpu_run_el(void) {} +#endif + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 3793003e16a2..2173149d8954 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index d1ce8e2f98b9..3e4f1a45b125 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -42,6 +42,7 @@ int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); static bool param_acpi_off __initdata; +static bool param_acpi_on __initdata; static bool param_acpi_force __initdata; static int __init parse_acpi(char *arg) @@ -52,6 +53,8 @@ static int __init parse_acpi(char *arg) /* "acpi=off" disables both ACPI table parsing and interpreter */ if (strcmp(arg, "off") == 0) param_acpi_off = true; + else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */ + param_acpi_on = true; else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */ param_acpi_force = true; else @@ -66,12 +69,24 @@ static int __init dt_scan_depth1_nodes(unsigned long node, void *data) { /* - * Return 1 as soon as we encounter a node at depth 1 that is - * not the /chosen node. + * Ignore anything not directly under the root node; we'll + * catch its parent instead. */ - if (depth == 1 && (strcmp(uname, "chosen") != 0)) - return 1; - return 0; + if (depth != 1) + return 0; + + if (strcmp(uname, "chosen") == 0) + return 0; + + if (strcmp(uname, "hypervisor") == 0 && + of_flat_dt_is_compatible(node, "xen,xen")) + return 0; + + /* + * This node at depth 1 is neither a chosen node nor a xen node, + * which we do not expect. + */ + return 1; } /* @@ -184,11 +199,13 @@ void __init acpi_boot_table_init(void) /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or - * - the device tree is not empty (it has more than just a /chosen node) - * and ACPI has not been force enabled (acpi=force) + * - the device tree is not empty (it has more than just a /chosen node, + * and a /hypervisor node when running on Xen) + * and ACPI has not been [force] enabled (acpi=on|force) */ if (param_acpi_off || - (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL))) + (!param_acpi_on && !param_acpi_force && + of_scan_flat_dt(dt_scan_depth1_nodes, NULL))) return; /* diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 3ae6b310ac9b..f8e5d47f0880 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -22,6 +22,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> +#include <linux/suspend.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/smp_plat.h> @@ -119,11 +120,14 @@ int main(void) DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); - DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); - DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); - DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); + DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); + DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + BLANK(); + DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); + DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); + DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); return 0; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 06afd04e02c0..d42789499f17 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -22,14 +22,16 @@ #include <asm/cpufeature.h> static bool __maybe_unused -is_affected_midr_range(const struct arm64_cpu_capabilities *entry) +is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) { + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, entry->midr_range_min, entry->midr_range_max); } #define MIDR_RANGE(model, min, max) \ + .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ @@ -101,6 +103,26 @@ const struct arm64_cpu_capabilities arm64_errata[] = { } }; +/* + * The CPU Errata work arounds are detected and applied at boot time + * and the related information is freed soon after. If the new CPU requires + * an errata not detected at boot, fail this CPU. + */ +void verify_local_cpu_errata(void) +{ + const struct arm64_cpu_capabilities *caps = arm64_errata; + + for (; caps->matches; caps++) + if (!cpus_have_cap(caps->capability) && + caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: Requires work around for %s, not detected" + " at boot time\n", + smp_processor_id(), + caps->desc ? : "an erratum"); + cpu_die_early(); + } +} + void check_local_cpu_errata(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 943f5140e0f3..811773d1c1d0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -71,7 +71,8 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); /* meta feature for alternatives */ static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); + static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), @@ -130,7 +131,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -435,22 +440,26 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); - init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); - init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); - init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); - init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); - init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); - init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); - init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); - init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); - init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); - init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); - init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); - init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); - init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); - init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); - init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); - init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + } + } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -555,47 +564,51 @@ void update_cpu_features(int cpu, info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); /* - * If we have AArch32, we care about 32-bit features for compat. These - * registers should be RES0 otherwise. + * If we have AArch32, we care about 32-bit features for compat. + * If the system doesn't support AArch32, don't update them. */ - taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, info->reg_id_dfr0, boot->reg_id_dfr0); - taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, info->reg_id_isar0, boot->reg_id_isar0); - taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, info->reg_id_isar1, boot->reg_id_isar1); - taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, info->reg_id_isar2, boot->reg_id_isar2); - taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, info->reg_id_isar3, boot->reg_id_isar3); - taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, info->reg_id_isar4, boot->reg_id_isar4); - taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, info->reg_id_isar5, boot->reg_id_isar5); - /* - * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and - * ACTLR formats could differ across CPUs and therefore would have to - * be trapped for virtualization anyway. - */ - taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, info->reg_id_mmfr0, boot->reg_id_mmfr0); - taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, info->reg_id_mmfr1, boot->reg_id_mmfr1); - taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, info->reg_id_mmfr2, boot->reg_id_mmfr2); - taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, info->reg_id_mmfr3, boot->reg_id_mmfr3); - taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, info->reg_id_pfr0, boot->reg_id_pfr0); - taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, info->reg_id_pfr1, boot->reg_id_pfr1); - taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, info->reg_mvfr0, boot->reg_mvfr0); - taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, info->reg_mvfr1, boot->reg_mvfr1); - taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, info->reg_mvfr2, boot->reg_mvfr2); + } /* * Mismatched CPU features are a recipe for disaster. Don't even @@ -614,6 +627,49 @@ u64 read_system_reg(u32 id) return regp->sys_val; } +/* + * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * Read the system register on the current CPU + */ +static u64 __raw_read_system_reg(u32 sys_id) +{ + switch (sys_id) { + case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); + default: + BUG(); + return 0; + } +} + #include <linux/irqchip/arm-gic-v3.h> static bool @@ -625,19 +681,24 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) } static bool -has_cpuid_feature(const struct arm64_cpu_capabilities *entry) +has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { u64 val; - val = read_system_reg(entry->sys_reg); + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + val = read_system_reg(entry->sys_reg); + else + val = __raw_read_system_reg(entry->sys_reg); + return feature_matches(val, entry); } -static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) +static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope) { bool has_sre; - if (!has_cpuid_feature(entry)) + if (!has_cpuid_feature(entry, scope)) return false; has_sre = gic_enable_sre(); @@ -648,7 +709,7 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) return has_sre; } -static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) +static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused) { u32 midr = read_cpuid_id(); u32 rv_min, rv_max; @@ -660,7 +721,7 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); } -static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); } @@ -669,6 +730,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, + .def_scope = SCOPE_SYSTEM, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -679,6 +741,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, @@ -691,6 +754,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -701,12 +765,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, + .def_scope = SCOPE_SYSTEM, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -717,20 +783,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, + .def_scope = SCOPE_SYSTEM, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, + .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, }, + { + .desc = "32-bit EL0 Support", + .capability = ARM64_HAS_32BIT_EL0, + .def_scope = SCOPE_SYSTEM, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR0_EL0_SHIFT, + .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, + }, {}, }; #define HWCAP_CAP(reg, field, s, min_value, type, cap) \ { \ .desc = #cap, \ + .def_scope = SCOPE_SYSTEM, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ @@ -740,7 +819,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .hwcap = cap, \ } -static const struct arm64_cpu_capabilities arm64_hwcaps[] = { +static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), @@ -751,6 +830,10 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + {}, +}; + +static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), @@ -761,7 +844,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -782,7 +865,7 @@ static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) } /* Check if we have a particular HWCAP enabled */ -static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) { bool rc; @@ -806,28 +889,23 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void __init setup_cpu_hwcaps(void) +static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { - int i; - const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; - - for (i = 0; hwcaps[i].matches; i++) - if (hwcaps[i].matches(&hwcaps[i])) - cap_set_hwcap(&hwcaps[i]); + for (; hwcaps->matches; hwcaps++) + if (hwcaps->matches(hwcaps, hwcaps->def_scope)) + cap_set_elf_hwcap(hwcaps); } void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { - int i; - - for (i = 0; caps[i].matches; i++) { - if (!caps[i].matches(&caps[i])) + for (; caps->matches; caps++) { + if (!caps->matches(caps, caps->def_scope)) continue; - if (!cpus_have_cap(caps[i].capability) && caps[i].desc) - pr_info("%s %s\n", info, caps[i].desc); - cpus_set_cap(caps[i].capability); + if (!cpus_have_cap(caps->capability) && caps->desc) + pr_info("%s %s\n", info, caps->desc); + cpus_set_cap(caps->capability); } } @@ -838,11 +916,9 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { - int i; - - for (i = 0; caps[i].matches; i++) - if (caps[i].enable && cpus_have_cap(caps[i].capability)) - on_each_cpu(caps[i].enable, NULL, true); + for (; caps->matches; caps++) + if (caps->enable && cpus_have_cap(caps->capability)) + on_each_cpu(caps->enable, NULL, true); } /* @@ -861,54 +937,45 @@ static inline void set_sys_caps_initialised(void) } /* - * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * Check for CPU features that are used in early boot + * based on the Boot CPU value. */ -static u64 __raw_read_system_reg(u32 sys_id) +static void check_early_cpu_features(void) { - switch (sys_id) { - case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + verify_cpu_run_el(); + verify_cpu_asid_bits(); +} - case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); - case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); +static void +verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) +{ - case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); - default: - BUG(); - return 0; - } + for (; caps->matches; caps++) + if (cpus_have_elf_hwcap(caps) && !caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: missing HWCAP: %s\n", + smp_processor_id(), caps->desc); + cpu_die_early(); + } } -/* - * Check for CPU features that are used in early boot - * based on the Boot CPU value. - */ -static void check_early_cpu_features(void) +static void +verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) { - verify_cpu_asid_bits(); + for (; caps->matches; caps++) { + if (!cpus_have_cap(caps->capability)) + continue; + /* + * If the new CPU misses an advertised feature, we cannot proceed + * further, park the cpu. + */ + if (!caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: missing feature: %s\n", + smp_processor_id(), caps->desc); + cpu_die_early(); + } + if (caps->enable) + caps->enable(NULL); + } } /* @@ -921,8 +988,6 @@ static void check_early_cpu_features(void) */ void verify_local_cpu_capabilities(void) { - int i; - const struct arm64_cpu_capabilities *caps; check_early_cpu_features(); @@ -933,32 +998,11 @@ void verify_local_cpu_capabilities(void) if (!sys_caps_initialised) return; - caps = arm64_features; - for (i = 0; caps[i].matches; i++) { - if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) - continue; - /* - * If the new CPU misses an advertised feature, we cannot proceed - * further, park the cpu. - */ - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { - pr_crit("CPU%d: missing feature: %s\n", - smp_processor_id(), caps[i].desc); - cpu_die_early(); - } - if (caps[i].enable) - caps[i].enable(NULL); - } - - for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { - if (!cpus_have_hwcap(&caps[i])) - continue; - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { - pr_crit("CPU%d: missing HWCAP: %s\n", - smp_processor_id(), caps[i].desc); - cpu_die_early(); - } - } + verify_local_cpu_errata(); + verify_local_cpu_features(arm64_features); + verify_local_elf_hwcaps(arm64_elf_hwcaps); + if (system_supports_32bit_el0()) + verify_local_elf_hwcaps(compat_elf_hwcaps); } static void __init setup_feature_capabilities(void) @@ -967,6 +1011,24 @@ static void __init setup_feature_capabilities(void) enable_cpu_capabilities(arm64_features); } +/* + * Check if the current CPU has a given feature capability. + * Should be called from non-preemptible context. + */ +bool this_cpu_has_cap(unsigned int cap) +{ + const struct arm64_cpu_capabilities *caps; + + if (WARN_ON(preemptible())) + return false; + + for (caps = arm64_features; caps->desc; caps++) + if (caps->capability == cap && caps->matches) + return caps->matches(caps, SCOPE_LOCAL_CPU); + + return false; +} + void __init setup_cpu_features(void) { u32 cwg; @@ -974,7 +1036,10 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); - setup_cpu_hwcaps(); + setup_elf_hwcaps(arm64_elf_hwcaps); + + if (system_supports_32bit_el0()) + setup_elf_hwcaps(compat_elf_hwcaps); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); @@ -993,7 +1058,7 @@ void __init setup_cpu_features(void) } static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) { return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); } diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 9047cab68fd3..e11857fce05f 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -19,7 +19,8 @@ int __init arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle) + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend && + cpu_ops[cpu]->cpu_init_idle) ret = cpu_ops[cpu]->cpu_init_idle(cpu); return ret; @@ -36,11 +37,5 @@ int arm_cpuidle_suspend(int index) { int cpu = smp_processor_id(); - /* - * If cpu_ops have not been registered or suspend - * has not been initialized, cpu_suspend call fails early. - */ - if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) - return -EOPNOTSUPP; return cpu_ops[cpu]->cpu_suspend(index); } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 84c8684431c7..3808470486f3 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -87,7 +87,8 @@ static const char *const compat_hwcap_str[] = { "idivt", "vfpd32", "lpae", - "evtstrm" + "evtstrm", + NULL }; static const char *const compat_hwcap2_str[] = { @@ -216,23 +217,26 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); - info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); - - info->reg_mvfr0 = read_cpuid(MVFR0_EL1); - info->reg_mvfr1 = read_cpuid(MVFR1_EL1); - info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + /* Update the 32bit ID registers only if AArch32 is implemented */ + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + + info->reg_mvfr0 = read_cpuid(MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + } cpuinfo_detect_icache_policy(info); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index c45f2968bc8c..4fbf3c54275c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -135,9 +135,8 @@ static void clear_os_lock(void *unused) static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { - int cpu = (unsigned long)data; if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - smp_call_function_single(cpu, clear_os_lock, NULL, 1); + clear_os_lock(NULL); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index cae3112f7791..e88c064b845c 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -62,7 +62,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - movz x21, #:abs_g0:stext_offset + ldr w21, =stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index b6abc852f2a1..78f52488f9ff 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -17,22 +17,51 @@ #include <asm/efi.h> -int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) +/* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. Also take the new (optional) RO/XP bits into account. + */ +static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) { - pteval_t prot_val; + u64 attr = md->attribute; + u32 type = md->type; - /* - * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be - * executable, everything else can be mapped with the XN bits - * set. - */ - if ((md->attribute & EFI_MEMORY_WB) == 0) - prot_val = PROT_DEVICE_nGnRE; - else if (md->type == EFI_RUNTIME_SERVICES_CODE || - !PAGE_ALIGNED(md->phys_addr)) - prot_val = pgprot_val(PAGE_KERNEL_EXEC); - else - prot_val = pgprot_val(PAGE_KERNEL); + if (type == EFI_MEMORY_MAPPED_IO) + return PROT_DEVICE_nGnRE; + + if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr), + "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?")) + /* + * If the region is not aligned to the page size of the OS, we + * can not use strict permissions, since that would also affect + * the mapping attributes of the adjacent regions. + */ + return pgprot_val(PAGE_KERNEL_EXEC); + + /* R-- */ + if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == + (EFI_MEMORY_XP | EFI_MEMORY_RO)) + return pgprot_val(PAGE_KERNEL_RO); + + /* R-X */ + if (attr & EFI_MEMORY_RO) + return pgprot_val(PAGE_KERNEL_ROX); + + /* RW- */ + if (attr & EFI_MEMORY_XP || type != EFI_RUNTIME_SERVICES_CODE) + return pgprot_val(PAGE_KERNEL); + + /* RWX */ + return pgprot_val(PAGE_KERNEL_EXEC); +} + +/* we will fill this structure from the stub, so don't put it in .bss */ +struct screen_info screen_info __section(.data); + +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) +{ + pteval_t prot_val = create_mapping_protection(md); create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 85da0f599cd6..2c6e598a94dc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -25,6 +25,7 @@ #include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/boot.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> #include <asm/cache.h> @@ -51,9 +52,6 @@ #error TEXT_OFFSET must be less than 2MB #endif -#define KERNEL_START _text -#define KERNEL_END _end - /* * Kernel startup entry point. * --------------------------- @@ -102,8 +100,6 @@ _head: #endif #ifdef CONFIG_EFI - .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -123,11 +119,11 @@ optional_header: .short 0x20b // PE32+ format .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion - .long _end - stext // SizeOfCode + .long _end - efi_header_end // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long __efistub_entry - _head // AddressOfEntryPoint - .long __efistub_stext_offset // BaseOfCode + .long efi_header_end - _head // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -144,7 +140,7 @@ extra_header_fields: .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header - .long __efistub_stext_offset // SizeOfHeaders + .long efi_header_end - _head // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -188,10 +184,10 @@ section_table: .byte 0 .byte 0 .byte 0 // end of 0 padding of section name - .long _end - stext // VirtualSize - .long __efistub_stext_offset // VirtualAddress - .long _edata - stext // SizeOfRawData - .long __efistub_stext_offset // PointerToRawData + .long _end - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long _edata - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) @@ -200,20 +196,23 @@ section_table: .long 0xe0500020 // Characteristics (section flags) /* - * EFI will load stext onwards at the 4k section alignment + * EFI will load .text onwards at the 4k section alignment * described in the PE/COFF header. To ensure that instruction * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that stext is + * correctly at this alignment, we must ensure that .text is * placed at a 4k boundary in the Image to begin with. */ .align 12 +efi_header_end: #endif + __INIT + ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode - mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET + and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -222,13 +221,11 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, 0f // address to jump to after + bl __cpu_setup // initialise processor + adr_l x27, __primary_switch // address to jump to after // MMU has been enabled - adr_l lr, __enable_mmu // return (PIC) address - b __cpu_setup // initialise processor + b __enable_mmu ENDPROC(stext) - .align 3 -0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -338,7 +335,7 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =SWAPPER_MM_MMUFLAGS + mov x7, SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. @@ -394,12 +391,13 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - ldr x5, =KIMAGE_VADDR + mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr w6, kernel_img_size - add x6, x6, x5 - mov x3, x24 // phys offset + adrp x6, _end // runtime __pa(_end) + adrp x3, _text // runtime __pa(_text) + sub x6, x6, x3 // _end - _text + add x6, x6, x5 // runtime __va(_end) create_block_map x0, x7, x3, x5, x6 /* @@ -414,16 +412,13 @@ __create_page_tables: ret x28 ENDPROC(__create_page_tables) - -kernel_img_size: - .long _end - (_head - TEXT_OFFSET) .ltorg /* * The following fragment of code is executed with the MMU enabled. */ .set initial_sp, init_thread_union + THREAD_START_SP -__mmap_switched: +__primary_switched: mov x28, lr // preserve LR adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address @@ -437,44 +432,6 @@ __mmap_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW -#ifdef CONFIG_RELOCATABLE - - /* - * Iterate over each entry in the relocation table, and apply the - * relocations in place. - */ - adr_l x8, __dynsym_start // start of symbol table - adr_l x9, __reloc_start // start of reloc table - adr_l x10, __reloc_end // end of reloc table - -0: cmp x9, x10 - b.hs 2f - ldp x11, x12, [x9], #24 - ldr x13, [x9, #-8] - cmp w12, #R_AARCH64_RELATIVE - b.ne 1f - add x13, x13, x23 // relocate - str x13, [x11, x23] - b 0b - -1: cmp w12, #R_AARCH64_ABS64 - b.ne 0b - add x12, x12, x12, lsl #1 // symtab offset: 24x top word - add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word - ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx - ldr x15, [x12, #8] // Elf64_Sym::st_value - cmp w14, #-0xf // SHN_ABS (0xfff1) ? - add x14, x15, x23 // relocate - csel x15, x14, x15, ne - add x15, x13, x15 - str x15, [x11, x23] - b 0b - -2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr - dc cvac, x8 // value visible to secondaries - dsb sy // with MMU off -#endif - adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) @@ -490,17 +447,19 @@ __mmap_switched: bl kasan_early_init #endif #ifdef CONFIG_RANDOMIZE_BASE - cbnz x23, 0f // already running randomized? + tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? + b.ne 0f mov x0, x21 // pass FDT address in x0 + mov x1, x23 // pass modulo offset in x1 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed - mov x23, x0 // record KASLR offset + orr x23, x23, x0 // record KASLR offset ret x28 // we must enable KASLR, return // to __enable_mmu() 0: #endif b start_kernel -ENDPROC(__mmap_switched) +ENDPROC(__primary_switched) /* * end early head section, begin head code that is also used for @@ -650,7 +609,7 @@ ENDPROC(el2_setup) * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed * in x20. See arch/arm64/include/asm/virt.h for more info. */ -ENTRY(set_cpu_boot_mode_flag) +set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f @@ -683,7 +642,7 @@ ENTRY(secondary_holding_pen) bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 - ldr x1, =MPIDR_HWID_BITMASK + mov_q x1, MPIDR_HWID_BITMASK and x0, x0, x1 adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] @@ -703,7 +662,7 @@ ENTRY(secondary_entry) b secondary_startup ENDPROC(secondary_entry) -ENTRY(secondary_startup) +secondary_startup: /* * Common entry point for secondary CPUs. */ @@ -711,14 +670,11 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x8, kimage_vaddr - ldr w9, 0f - sub x27, x8, w9, sxtw // address to jump to after enabling the MMU + adr_l x27, __secondary_switch // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) -0: .long (_text - TEXT_OFFSET) - __secondary_switched -ENTRY(__secondary_switched) +__secondary_switched: adr_l x5, vectors msr vbar_el1, x5 isb @@ -768,7 +724,7 @@ ENTRY(__early_cpu_boot_status) * If it isn't, park the CPU */ .section ".idmap.text", "ax" -__enable_mmu: +ENTRY(__enable_mmu) mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 @@ -806,7 +762,6 @@ __enable_mmu: ic iallu // flush instructions fetched dsb nsh // via old mapping isb - add x27, x27, x23 // relocated __mmap_switched #endif br x27 ENDPROC(__enable_mmu) @@ -819,3 +774,53 @@ __no_granule_support: wfi b 1b ENDPROC(__no_granule_support) + +__primary_switch: +#ifdef CONFIG_RELOCATABLE + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + ldr w8, =__dynsym_offset // offset to symbol table + ldr w9, =__rela_offset // offset to reloc table + ldr w10, =__rela_size // size of reloc table + + mov_q x11, KIMAGE_VADDR // default virtual offset + add x11, x11, x23 // actual virtual offset + add x8, x8, x11 // __va(.dynsym) + add x9, x9, x11 // __va(.rela) + add x10, x9, x10 // __va(.rela) + sizeof(.rela) + +0: cmp x9, x10 + b.hs 2f + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 1f + add x13, x13, x23 // relocate + str x13, [x11, x23] + b 0b + +1: cmp w12, #R_AARCH64_ABS64 + b.ne 0b + add x12, x12, x12, lsl #1 // symtab offset: 24x top word + add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx + ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne + add x15, x13, x15 + str x15, [x11, x23] + b 0b + +2: +#endif + ldr x8, =__primary_switched + br x8 +ENDPROC(__primary_switch) + +__secondary_switch: + ldr x8, =__secondary_switched + br x8 +ENDPROC(__secondary_switch) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S new file mode 100644 index 000000000000..46f29b6560ec --- /dev/null +++ b/arch/arm64/kernel/hibernate-asm.S @@ -0,0 +1,176 @@ +/* + * Hibernate low-level support + * + * Copyright (C) 2016 ARM Ltd. + * Author: James Morse <james.morse@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/linkage.h> +#include <linux/errno.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/cputype.h> +#include <asm/memory.h> +#include <asm/page.h> +#include <asm/virt.h> + +/* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ +.macro break_before_make_ttbr_switch zero_page, page_table + msr ttbr1_el1, \zero_page + isb + tlbi vmalle1is + dsb ish + msr ttbr1_el1, \page_table + isb +.endm + + +/* + * Resume from hibernate + * + * Loads temporary page tables then restores the memory image. + * Finally branches to cpu_resume() to restore the state saved by + * swsusp_arch_suspend(). + * + * Because this code has to be copied to a 'safe' page, it can't call out to + * other functions by PC-relative address. Also remember that it may be + * mid-way through over-writing other functions. For this reason it contains + * code from flush_icache_range() and uses the copy_page() macro. + * + * This 'safe' page is mapped via ttbr0, and executed from there. This function + * switches to a copy of the linear map in ttbr1, performs the restore, then + * switches ttbr1 to the original kernel's swapper_pg_dir. + * + * All of memory gets written to, including code. We need to clean the kernel + * text to the Point of Coherence (PoC) before secondary cores can be booted. + * Because the kernel modules and executable pages mapped to user space are + * also written as data, we clean all pages we touch to the Point of + * Unification (PoU). + * + * x0: physical address of temporary page tables + * x1: physical address of swapper page tables + * x2: address of cpu_resume + * x3: linear map address of restore_pblist in the current kernel + * x4: physical address of __hyp_stub_vectors, or 0 + * x5: physical address of a zero page that remains zero after resume + */ +.pushsection ".hibernate_exit.text", "ax" +ENTRY(swsusp_arch_suspend_exit) + /* + * We execute from ttbr0, change ttbr1 to our copied linear map tables + * with a break-before-make via the zero page + */ + break_before_make_ttbr_switch x5, x0 + + mov x21, x1 + mov x30, x2 + mov x24, x4 + mov x25, x5 + + /* walk the restore_pblist and use copy_page() to over-write memory */ + mov x19, x3 + +1: ldr x10, [x19, #HIBERN_PBE_ORIG] + mov x0, x10 + ldr x1, [x19, #HIBERN_PBE_ADDR] + + copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 + + add x1, x10, #PAGE_SIZE + /* Clean the copied page to PoU - based on flush_icache_range() */ + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x10, x3 +2: dc cvau, x4 /* clean D line / unified line */ + add x4, x4, x2 + cmp x4, x1 + b.lo 2b + + ldr x19, [x19, #HIBERN_PBE_NEXT] + cbnz x19, 1b + dsb ish /* wait for PoU cleaning to finish */ + + /* switch to the restored kernels page tables */ + break_before_make_ttbr_switch x25, x21 + + ic ialluis + dsb ish + isb + + cbz x24, 3f /* Do we need to re-initialise EL2? */ + hvc #0 +3: ret + + .ltorg +ENDPROC(swsusp_arch_suspend_exit) + +/* + * Restore the hyp stub. + * This must be done before the hibernate page is unmapped by _cpu_resume(), + * but happens before any of the hyp-stub's code is cleaned to PoC. + * + * x24: The physical address of __hyp_stub_vectors + */ +el1_sync: + msr vbar_el2, x24 + eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + +/* el2 vectors - switch el2 here while we restore the memory image. */ + .align 11 +ENTRY(hibernate_el2_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +END(hibernate_el2_vectors) + +.popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c new file mode 100644 index 000000000000..f8df75d740f4 --- /dev/null +++ b/arch/arm64/kernel/hibernate.c @@ -0,0 +1,487 @@ +/*: + * Hibernate support specific for ARM64 + * + * Derived from work on ARM hibernation support by: + * + * Ubuntu project, hibernation support for mach-dove + * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) + * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) + * https://lkml.org/lkml/2010/6/18/4 + * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html + * https://patchwork.kernel.org/patch/96442/ + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * License terms: GNU General Public License (GPL) version 2 + */ +#define pr_fmt(x) "hibernate: " x +#include <linux/kvm_host.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/pm.h> +#include <linux/sched.h> +#include <linux/suspend.h> +#include <linux/utsname.h> +#include <linux/version.h> + +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/irqflags.h> +#include <asm/memory.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/sections.h> +#include <asm/suspend.h> +#include <asm/virt.h> + +/* + * Hibernate core relies on this value being 0 on resume, and marks it + * __nosavedata assuming it will keep the resume kernel's '0' value. This + * doesn't happen with either KASLR. + * + * defined as "__visible int in_suspend __nosavedata" in + * kernel/power/hibernate.c + */ +extern int in_suspend; + +/* Find a symbols alias in the linear map */ +#define LMADDR(x) phys_to_virt(virt_to_phys(x)) + +/* Do we need to reset el2? */ +#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) + +/* + * Start/end of the hibernate exit code, this must be copied to a 'safe' + * location in memory, and executed from there. + */ +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; + +/* temporary el2 vectors in the __hibernate_exit_text section. */ +extern char hibernate_el2_vectors[]; + +/* hyp-stub vectors, used to restore el2 during resume from hibernate. */ +extern char __hyp_stub_vectors[]; + +/* + * Values that may not change over hibernate/resume. We put the build number + * and date in here so that we guarantee not to resume with a different + * kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/* These values need to be know across a hibernate/restore. */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + + /* These are needed to find the relocated kernel if built with kaslr */ + phys_addr_t ttbr1_el1; + void (*reenter_kernel)(void); + + /* + * We need to know where the __hyp_stub_vectors are after restore to + * re-configure el2. + */ + phys_addr_t __hyp_stub_vectors; +} resume_hdr; + +static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + + return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); +} + +void notrace restore_processor_state(void) +{ +} + +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct arch_hibernate_hdr *hdr = addr; + + if (max_size < sizeof(*hdr)) + return -EOVERFLOW; + + arch_hdr_invariants(&hdr->invariants); + hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); + hdr->reenter_kernel = _cpu_resume; + + /* We can't use __hyp_get_vectors() because kvm may still be loaded */ + if (el2_reset_needed()) + hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); + else + hdr->__hyp_stub_vectors = 0; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_save); + +int arch_hibernation_header_restore(void *addr) +{ + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; + + arch_hdr_invariants(&invariants); + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + resume_hdr = *hdr; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_restore); + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintentance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr, + void *(*allocator)(gfp_t mask), + gfp_t mask) +{ + int rc = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long dst = (unsigned long)allocator(mask); + + if (!dst) { + rc = -ENOMEM; + goto out; + } + + memcpy((void *)dst, src_start, length); + flush_icache_range(dst, dst + length); + + pgd = pgd_offset_raw(allocator(mask), dst_addr); + if (pgd_none(*pgd)) { + pud = allocator(mask); + if (!pud) { + rc = -ENOMEM; + goto out; + } + pgd_populate(&init_mm, pgd, pud); + } + + pud = pud_offset(pgd, dst_addr); + if (pud_none(*pud)) { + pmd = allocator(mask); + if (!pmd) { + rc = -ENOMEM; + goto out; + } + pud_populate(&init_mm, pud, pmd); + } + + pmd = pmd_offset(pud, dst_addr); + if (pmd_none(*pmd)) { + pte = allocator(mask); + if (!pte) { + rc = -ENOMEM; + goto out; + } + pmd_populate_kernel(&init_mm, pmd, pte); + } + + pte = pte_offset_kernel(pmd, dst_addr); + set_pte(pte, __pte(virt_to_phys((void *)dst) | + pgprot_val(PAGE_KERNEL_EXEC))); + + /* Load our new page tables */ + asm volatile("msr ttbr0_el1, %0;" + "isb;" + "tlbi vmalle1is;" + "dsb ish;" + "isb" : : "r"(virt_to_phys(pgd))); + + *phys_dst_addr = virt_to_phys((void *)dst); + +out: + return rc; +} + + +int swsusp_arch_suspend(void) +{ + int ret = 0; + unsigned long flags; + struct sleep_stack_data state; + + local_dbg_save(flags); + + if (__cpu_suspend_enter(&state)) { + ret = swsusp_save(); + } else { + /* Clean kernel to PoC for secondary core startup */ + __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + + /* + * Tell the hibernation core that we've just restored + * the memory + */ + in_suspend = 0; + + __cpu_suspend_exit(); + } + + local_dbg_restore(flags); + + return ret; +} + +static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, + unsigned long end) +{ + pte_t *src_pte; + pte_t *dst_pte; + unsigned long addr = start; + + dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pte) + return -ENOMEM; + pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); + dst_pte = pte_offset_kernel(dst_pmd, start); + + src_pte = pte_offset_kernel(src_pmd, start); + do { + if (!pte_none(*src_pte)) + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + + return 0; +} + +static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, + unsigned long end) +{ + pmd_t *src_pmd; + pmd_t *dst_pmd; + unsigned long next; + unsigned long addr = start; + + if (pud_none(*dst_pud)) { + dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmd) + return -ENOMEM; + pud_populate(&init_mm, dst_pud, dst_pmd); + } + dst_pmd = pmd_offset(dst_pud, start); + + src_pmd = pmd_offset(src_pud, start); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*src_pmd)) + continue; + if (pmd_table(*src_pmd)) { + if (copy_pte(dst_pmd, src_pmd, addr, next)) + return -ENOMEM; + } else { + set_pmd(dst_pmd, + __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); + } + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + + return 0; +} + +static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, + unsigned long end) +{ + pud_t *dst_pud; + pud_t *src_pud; + unsigned long next; + unsigned long addr = start; + + if (pgd_none(*dst_pgd)) { + dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pud) + return -ENOMEM; + pgd_populate(&init_mm, dst_pgd, dst_pud); + } + dst_pud = pud_offset(dst_pgd, start); + + src_pud = pud_offset(src_pgd, start); + do { + next = pud_addr_end(addr, end); + if (pud_none(*src_pud)) + continue; + if (pud_table(*(src_pud))) { + if (copy_pmd(dst_pud, src_pud, addr, next)) + return -ENOMEM; + } else { + set_pud(dst_pud, + __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); + } + } while (dst_pud++, src_pud++, addr = next, addr != end); + + return 0; +} + +static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, + unsigned long end) +{ + unsigned long next; + unsigned long addr = start; + pgd_t *src_pgd = pgd_offset_k(start); + + dst_pgd = pgd_offset_raw(dst_pgd, start); + do { + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd)) + continue; + if (copy_pud(dst_pgd, src_pgd, addr, next)) + return -ENOMEM; + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return 0; +} + +/* + * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). + * + * Memory allocated by get_safe_page() will be dealt with by the hibernate code, + * we don't need to free it here. + */ +int swsusp_arch_resume(void) +{ + int rc = 0; + void *zero_page; + size_t exit_size; + pgd_t *tmp_pg_dir; + void *lm_restore_pblist; + phys_addr_t phys_hibernate_exit; + void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, + void *, phys_addr_t, phys_addr_t); + + /* + * Locate the exit code in the bottom-but-one page, so that *NULL + * still has disastrous affects. + */ + hibernate_exit = (void *)PAGE_SIZE; + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; + /* + * Copy swsusp_arch_suspend_exit() to a safe page. This will generate + * a new set of ttbr0 page tables and load them. + */ + rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, + (unsigned long)hibernate_exit, + &phys_hibernate_exit, + (void *)get_safe_page, GFP_ATOMIC); + if (rc) { + pr_err("Failed to create safe executable page for hibernate_exit code."); + goto out; + } + + /* + * The hibernate exit text contains a set of el2 vectors, that will + * be executed at el2 with the mmu off in order to reload hyp-stub. + */ + __flush_dcache_area(hibernate_exit, exit_size); + + /* + * Restoring the memory image will overwrite the ttbr1 page tables. + * Create a second copy of just the linear map, and use this when + * restoring. + */ + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!tmp_pg_dir) { + pr_err("Failed to allocate memory for temporary page tables."); + rc = -ENOMEM; + goto out; + } + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + if (rc) + goto out; + + /* + * Since we only copied the linear map, we need to find restore_pblist's + * linear map address. + */ + lm_restore_pblist = LMADDR(restore_pblist); + + /* + * KASLR will cause the el2 vectors to be in a different location in + * the resumed kernel. Load hibernate's temporary copy into el2. + * + * We can skip this step if we booted at EL1, or are running with VHE. + */ + if (el2_reset_needed()) { + phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + el2_vectors += hibernate_el2_vectors - + __hibernate_exit_text_start; /* offset */ + + __hyp_set_vectors(el2_vectors); + } + + /* + * We need a zero page that is zero before & after resume in order to + * to break before make on the ttbr1 page tables. + */ + zero_page = (void *)get_safe_page(GFP_ATOMIC); + + hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, + resume_hdr.reenter_kernel, lm_restore_pblist, + resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); + +out: + return rc; +} + +static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + if (action == PM_HIBERNATION_PREPARE && + cpumask_first(cpu_online_mask) != 0) { + pr_warn("CPU0 is offline.\n"); + return notifier_from_errno(-ENODEV); + } + + return NOTIFY_OK; +} + +static int __init check_boot_cpu_online_init(void) +{ + /* + * Set this pm_notifier callback with a lower priority than + * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be + * called earlier to disable cpu hotplug before the cpu online check. + */ + pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); + + return 0; +} +core_initcall(check_boot_cpu_online_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b45c95d34b83..ce21aa88263f 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (!bp->overflow_handler) + if (is_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, perf_bp_event(wp, regs); /* Do we need to handle the stepping? */ - if (!wp->overflow_handler) + if (is_default_overflow_handler(wp)) step = 1; unlock: @@ -886,9 +886,11 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - int cpu = (long)hcpu; - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) { + local_irq_disable(); + hw_breakpoint_reset(NULL); + local_irq_enable(); + } return NOTIFY_OK; } diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index a272f335c289..8727f4490772 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -22,6 +22,8 @@ #include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/ptrace.h> #include <asm/virt.h> @@ -53,15 +55,26 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x1, esr_el2 - lsr x1, x1, #26 - cmp x1, #0x16 - b.ne 2f // Not an HVC trap - cbz x0, 1f - msr vbar_el2, x0 // Set vbar_el2 - b 2f -1: mrs x0, vbar_el2 // Return vbar_el2 -2: eret + mrs x30, esr_el2 + lsr x30, x30, #ESR_ELx_EC_SHIFT + + cmp x30, #ESR_ELx_EC_HVC64 + b.ne 9f // Not an HVC trap + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 9f + +1: cmp x0, #HVC_SET_VECTORS + b.ne 2f + msr vbar_el2, x1 + b 9f + + /* Someone called kvm_call_hyp() against the hyp-stub... */ +2: mov x0, #ARM_EXCEPTION_HYP_GONE + +9: eret ENDPROC(el1_sync) .macro invalid_vector label @@ -101,10 +114,18 @@ ENDPROC(\label) */ ENTRY(__hyp_get_vectors) - mov x0, xzr - // fall through -ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x0, #HVC_GET_VECTORS hvc #0 + ldr lr, [sp], #16 ret ENDPROC(__hyp_get_vectors) + +ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x1, x0 + mov x0, #HVC_SET_VECTORS + hvc #0 + ldr lr, [sp], #16 + ret ENDPROC(__hyp_set_vectors) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 5e360ce88f10..c7fcb232fe47 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -73,6 +73,8 @@ #ifdef CONFIG_EFI +__efistub_stext_offset = stext - _text; + /* * Prevent the symbol aliases below from being emitted into the kallsyms * table, by forcing them to be absolute symbols (which are conveniently @@ -112,6 +114,7 @@ __efistub___memset = KALLSYMS_HIDE(__pi_memset); __efistub__text = KALLSYMS_HIDE(_text); __efistub__end = KALLSYMS_HIDE(_end); __efistub__edata = KALLSYMS_HIDE(_edata); +__efistub_screen_info = KALLSYMS_HIDE(screen_info); #endif diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7371455160e5..368c08290dd8 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) - page = virt_to_page(addr); + page = pfn_to_page(PHYS_PFN(__pa(addr))); else return addr; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 582983920054..b05469173ba5 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -74,7 +74,7 @@ extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, * containing function pointers) to be reinitialized, and zero-initialized * .bss variables will be reset to 0. */ -u64 __init kaslr_early_init(u64 dt_phys) +u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) { void *fdt; u64 seed, offset, mask, module_range; @@ -132,8 +132,8 @@ u64 __init kaslr_early_init(u64 dt_phys) * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this * happens, increase the KASLR offset by the size of the kernel image. */ - if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) offset = (offset + (u64)(_end - _text)) & mask; if (IS_ENABLED(CONFIG_KASAN)) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index c72de668e1d4..3c4e308b40a0 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -74,6 +74,16 @@ int raw_pci_write(unsigned int domain, unsigned int bus, return -ENXIO; } +#ifdef CONFIG_NUMA + +int pcibus_to_node(struct pci_bus *bus) +{ + return dev_to_node(&bus->dev); +} +EXPORT_SYMBOL(pcibus_to_node); + +#endif + #ifdef CONFIG_ACPI /* Root bridge scanning */ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index ff4665462a02..32c3c6e70119 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry, tail = (struct frame_tail __user *)regs->regs[29]; - while (entry->nr < PERF_MAX_STACK_DEPTH && + while (entry->nr < sysctl_perf_event_max_stack && tail && !((unsigned long)tail & 0xf)) tail = user_backtrace(tail, entry); } else { @@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry, tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; - while ((entry->nr < PERF_MAX_STACK_DEPTH) && + while ((entry->nr < sysctl_perf_event_max_stack) && tail && !((unsigned long)tail & 0x3)) tail = compat_user_backtrace(tail, entry); #endif diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f419a7c075a4..838ccf123307 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -21,6 +21,7 @@ #include <asm/irq_regs.h> #include <asm/perf_event.h> +#include <asm/sysreg.h> #include <asm/virt.h> #include <linux/of.h> @@ -33,43 +34,43 @@ */ /* Required events. */ -#define ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS 0x04 -#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED 0x12 +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 /* At least one of the following is required. */ -#define ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED 0x08 -#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x1B +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B /* Common architectural events. */ -#define ARMV8_PMUV3_PERFCTR_MEM_READ 0x06 -#define ARMV8_PMUV3_PERFCTR_MEM_WRITE 0x07 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07 #define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09 -#define ARMV8_PMUV3_PERFCTR_EXC_EXECUTED 0x0A -#define ARMV8_PMUV3_PERFCTR_CID_WRITE 0x0B -#define ARMV8_PMUV3_PERFCTR_PC_WRITE 0x0C -#define ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH 0x0D -#define ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN 0x0E -#define ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F -#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE 0x1C +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C #define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E #define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21 /* Common microarchitectural events. */ -#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL 0x01 -#define ARMV8_PMUV3_PERFCTR_ITLB_REFILL 0x02 -#define ARMV8_PMUV3_PERFCTR_DTLB_REFILL 0x05 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05 #define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13 -#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS 0x14 -#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB 0x15 -#define ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS 0x16 -#define ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL 0x17 -#define ARMV8_PMUV3_PERFCTR_L2_CACHE_WB 0x18 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18 #define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19 -#define ARMV8_PMUV3_PERFCTR_MEM_ERROR 0x1A +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A #define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D #define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F #define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20 @@ -85,89 +86,182 @@ #define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B #define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C #define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D -#define ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL 0x2E +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E #define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F -#define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30 - -/* ARMv8 implementation defined event types. */ -#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD 0x40 -#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST 0x41 -#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD 0x42 -#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43 -#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C -#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D -#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD 0x4E -#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST 0x4F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30 + +/* ARMv8 recommended implementation defined event types */ +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F + +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65 + +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8 /* ARMv8 Cortex-A53 specific event types. */ -#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2 +#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 /* ARMv8 Cavium ThunderX specific event types. */ -#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST 0xE9 -#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS 0xEA -#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS 0xEB -#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS 0xEC -#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS 0xED +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9 +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB +#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC +#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; /* ARM Cortex-A53 HW events mapping. */ static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, }; /* ARM Cortex-A57 and Cortex-A72 events mapping. */ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, }; static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, }; +/* Broadcom Vulcan events mapping */ +static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_BR_RETIRED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, +}; + static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] @@ -175,21 +269,21 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL, + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] @@ -197,23 +291,23 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST, + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] @@ -221,67 +315,108 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, - [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS, - [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS, - - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, +}; + +static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; +static ssize_t +armv8pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%03llx\n", pmu_attr->id); +} + #define ARMV8_EVENT_ATTR_RESOLVE(m) #m #define ARMV8_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR_STRING(name, armv8_event_attr_##name, \ - "event=" ARMV8_EVENT_ATTR_RESOLVE(config)) - -ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR); -ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL); -ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_ITLB_REFILL); -ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL); -ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS); -ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_DTLB_REFILL); -ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_MEM_READ); -ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_MEM_WRITE); -ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED); + PMU_EVENT_ATTR(name, armv8_event_attr_##name, \ + config, armv8pmu_events_sysfs_show) + +ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR); +ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL); +ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL); +ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL); +ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE); +ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL); +ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED); +ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED); +ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED); ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN); -ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_EXECUTED); -ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE); -ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE); -ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH); -ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN); -ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS); -ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED); -ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES); -ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED); +ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN); +ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED); +ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED); +ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED); +ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED); +ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED); +ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED); +ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES); +ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED); ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS); -ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS); -ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB); -ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS); -ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL); -ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2_CACHE_WB); +ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE); +ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB); +ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE); +ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL); +ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB); ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS); -ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEM_ERROR); -ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC); -ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE); +ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR); +ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC); +ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED); ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES); -ARMV8_EVENT_ATTR(chain, ARMV8_PMUV3_PERFCTR_CHAIN); +/* Don't expose the chain event in /sys, since it's useless in isolation */ ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE); ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE); ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED); @@ -297,9 +432,9 @@ ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL); ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE); ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB); ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL); -ARMV8_EVENT_ATTR(l21_tlb_refill, ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL); +ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL); ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB); -ARMV8_EVENT_ATTR(l21_tlb, ARMV8_PMUV3_PERFCTR_L21_TLB); +ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB); static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_sw_incr.attr.attr, @@ -332,7 +467,6 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_inst_spec.attr.attr, &armv8_event_attr_ttbr_write_retired.attr.attr, &armv8_event_attr_bus_cycles.attr.attr, - &armv8_event_attr_chain.attr.attr, &armv8_event_attr_l1d_cache_allocate.attr.attr, &armv8_event_attr_l2d_cache_allocate.attr.attr, &armv8_event_attr_br_retired.attr.attr, @@ -348,15 +482,33 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_l3d_cache.attr.attr, &armv8_event_attr_l3d_cache_wb.attr.attr, &armv8_event_attr_l2d_tlb_refill.attr.attr, - &armv8_event_attr_l21_tlb_refill.attr.attr, + &armv8_event_attr_l2i_tlb_refill.attr.attr, &armv8_event_attr_l2d_tlb.attr.attr, - &armv8_event_attr_l21_tlb.attr.attr, + &armv8_event_attr_l2i_tlb.attr.attr, NULL, }; +static umode_t +armv8pmu_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + + if (test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) + return attr->mode; + + return 0; +} + static struct attribute_group armv8_pmuv3_events_attr_group = { .name = "events", .attrs = armv8_pmuv3_event_attrs, + .is_visible = armv8pmu_event_attr_is_visible, }; PMU_FORMAT_ATTR(event, "config:0-9"); @@ -397,16 +549,14 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { static inline u32 armv8pmu_pmcr_read(void) { - u32 val; - asm volatile("mrs %0, pmcr_el0" : "=r" (val)); - return val; + return read_sysreg(pmcr_el0); } static inline void armv8pmu_pmcr_write(u32 val) { val &= ARMV8_PMU_PMCR_MASK; isb(); - asm volatile("msr pmcr_el0, %0" :: "r" (val)); + write_sysreg(val, pmcr_el0); } static inline int armv8pmu_has_overflowed(u32 pmovsr) @@ -428,7 +578,7 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) static inline int armv8pmu_select_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmselr_el0, %0" :: "r" (counter)); + write_sysreg(counter, pmselr_el0); isb(); return idx; @@ -445,9 +595,9 @@ static inline u32 armv8pmu_read_counter(struct perf_event *event) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) - asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); + value = read_sysreg(pmccntr_el0); else if (armv8pmu_select_counter(idx) == idx) - asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); + value = read_sysreg(pmxevcntr_el0); return value; } @@ -469,47 +619,47 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) */ u64 value64 = 0xffffffff00000000ULL | value; - asm volatile("msr pmccntr_el0, %0" :: "r" (value64)); + write_sysreg(value64, pmccntr_el0); } else if (armv8pmu_select_counter(idx) == idx) - asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); + write_sysreg(value, pmxevcntr_el0); } static inline void armv8pmu_write_evtype(int idx, u32 val) { if (armv8pmu_select_counter(idx) == idx) { val &= ARMV8_PMU_EVTYPE_MASK; - asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); + write_sysreg(val, pmxevtyper_el0); } } static inline int armv8pmu_enable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmcntenset_el0); return idx; } static inline int armv8pmu_disable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmcntenclr_el0); return idx; } static inline int armv8pmu_enable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmintenset_el1); return idx; } static inline int armv8pmu_disable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmintenclr_el1); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmovsclr_el0); isb(); return idx; @@ -520,11 +670,11 @@ static inline u32 armv8pmu_getreset_flags(void) u32 value; /* Read */ - asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); + value = read_sysreg(pmovsclr_el0); /* Write to clear flags */ value &= ARMV8_PMU_OVSR_MASK; - asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); + write_sysreg(value, pmovsclr_el0); return value; } @@ -685,7 +835,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; /* Always place a cycle counter into the cycle counter. */ - if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) { + if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) return -EAGAIN; @@ -781,22 +931,38 @@ static int armv8_thunder_map_event(struct perf_event *event) ARMV8_PMU_EVTYPE_EVENT); } -static void armv8pmu_read_num_pmnc_events(void *info) +static int armv8_vulcan_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv8_vulcan_perf_map, + &armv8_vulcan_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + +static void __armv8pmu_probe_pmu(void *info) { - int *nb_cnt = info; + struct arm_pmu *cpu_pmu = info; + u32 pmceid[2]; /* Read the nb of CNTx counters supported from PMNC */ - *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; + cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) + & ARMV8_PMU_PMCR_N_MASK; /* Add the CPU cycles counter */ - *nb_cnt += 1; + cpu_pmu->num_events += 1; + + pmceid[0] = read_sysreg(pmceid0_el0); + pmceid[1] = read_sysreg(pmceid1_el0); + + bitmap_from_u32array(cpu_pmu->pmceid_bitmap, + ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid, + ARRAY_SIZE(pmceid)); } -static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu) +static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) { - return smp_call_function_any(&arm_pmu->supported_cpus, - armv8pmu_read_num_pmnc_events, - &arm_pmu->num_events, 1); + return smp_call_function_any(&cpu_pmu->supported_cpus, + __armv8pmu_probe_pmu, + cpu_pmu, 1); } static void armv8_pmu_init(struct arm_pmu *cpu_pmu) @@ -819,7 +985,8 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; - return armv8pmu_probe_num_events(cpu_pmu); + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) @@ -828,7 +995,7 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) @@ -837,7 +1004,7 @@ static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) @@ -846,7 +1013,7 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cortex_a72"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) @@ -855,7 +1022,16 @@ static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cavium_thunder"; cpu_pmu->map_event = armv8_thunder_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); +} + +static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_brcm_vulcan"; + cpu_pmu->map_event = armv8_vulcan_map_event; + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + return armv8pmu_probe_pmu(cpu_pmu); } static const struct of_device_id armv8_pmu_of_device_ids[] = { @@ -864,6 +1040,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, + {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, {}, }; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 80624829db61..48eea6866c67 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -265,9 +265,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (stack_start) { if (is_compat_thread(task_thread_info(p))) childregs->compat_sp = stack_start; - /* 16-byte aligned stack mandatory on AArch64 */ - else if (stack_start & 15) - return -EINVAL; else childregs->sp = stack_start; } @@ -382,13 +379,14 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } -static unsigned long randomize_base(unsigned long base) -{ - unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1; - return randomize_range(base, range_end, 0) ? : base; -} - unsigned long arch_randomize_brk(struct mm_struct *mm) { - return randomize_base(mm->brk); + unsigned long range_end = mm->brk; + + if (is_compat_task()) + range_end += 0x02000000; + else + range_end += 0x40000000; + + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 9dc67769b6a4..3279defabaa2 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -53,6 +53,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/kasan.h> +#include <asm/numa.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -175,7 +176,6 @@ static void __init smp_build_mpidr_hash(void) */ if (mpidr_hash_size() > 4 * num_possible_cpus()) pr_warn("Large number of MPIDR hash buckets detected\n"); - __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -224,69 +224,6 @@ static void __init request_standard_resources(void) } } -#ifdef CONFIG_BLK_DEV_INITRD -/* - * Relocate initrd if it is not completely within the linear mapping. - * This would be the case if mem= cuts out all or part of it. - */ -static void __init relocate_initrd(void) -{ - phys_addr_t orig_start = __virt_to_phys(initrd_start); - phys_addr_t orig_end = __virt_to_phys(initrd_end); - phys_addr_t ram_end = memblock_end_of_DRAM(); - phys_addr_t new_start; - unsigned long size, to_free = 0; - void *dest; - - if (orig_end <= ram_end) - return; - - /* - * Any of the original initrd which overlaps the linear map should - * be freed after relocating. - */ - if (orig_start < ram_end) - to_free = ram_end - orig_start; - - size = orig_end - orig_start; - if (!size) - return; - - /* initrd needs to be relocated completely inside linear mapping */ - new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn), - size, PAGE_SIZE); - if (!new_start) - panic("Cannot relocate initrd of size %ld\n", size); - memblock_reserve(new_start, size); - - initrd_start = __phys_to_virt(new_start); - initrd_end = initrd_start + size; - - pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n", - orig_start, orig_start + size - 1, - new_start, new_start + size - 1); - - dest = (void *)initrd_start; - - if (to_free) { - memcpy(dest, (void *)__phys_to_virt(orig_start), to_free); - dest += to_free; - } - - copy_from_early_mem(dest, orig_start + to_free, size - to_free); - - if (to_free) { - pr_info("Freeing original RAMDISK from [%llx-%llx]\n", - orig_start, orig_start + to_free - 1); - memblock_free(orig_start, to_free); - } -} -#else -static inline void __init relocate_initrd(void) -{ -} -#endif - u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) @@ -327,7 +264,11 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); paging_init(); - relocate_initrd(); + + if (acpi_disabled) + unflatten_device_tree(); + + bootmem_init(); kasan_init(); @@ -335,12 +276,11 @@ void __init setup_arch(char **cmdline_p) early_ioremap_reset(); - if (acpi_disabled) { - unflatten_device_tree(); + if (acpi_disabled) psci_dt_init(); - } else { + else psci_acpi_init(); - } + xen_early_init(); cpu_read_bootcpu_ops(); @@ -379,6 +319,9 @@ static int __init topology_init(void) { int i; + for_each_online_node(i) + register_one_node(i); + for_each_possible_cpu(i) { struct cpu *cpu = &per_cpu(cpu_data.cpu, i); cpu->hotpluggable = 1; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index fd10eb663868..9a3aec97ac09 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -49,39 +49,32 @@ orr \dst, \dst, \mask // dst|=(aff3>>rs3) .endm /* - * Save CPU state for a suspend and execute the suspend finisher. - * On success it will return 0 through cpu_resume - ie through a CPU - * soft/hard reboot from the reset vector. - * On failure it returns the suspend finisher return value or force - * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher - * is not allowed to return, if it does this must be considered failure). - * It saves callee registers, and allocates space on the kernel stack - * to save the CPU specific registers + some other data for resume. + * Save CPU state in the provided sleep_stack_data area, and publish its + * location for cpu_resume()'s use in sleep_save_stash. * - * x0 = suspend finisher argument - * x1 = suspend finisher function pointer + * cpu_resume() will restore this saved state, and return. Because the + * link-register is saved and restored, it will appear to return from this + * function. So that the caller can tell the suspend/resume paths apart, + * __cpu_suspend_enter() will always return a non-zero value, whereas the + * path through cpu_resume() will return 0. + * + * x0 = struct sleep_stack_data area */ ENTRY(__cpu_suspend_enter) - stp x29, lr, [sp, #-96]! - stp x19, x20, [sp,#16] - stp x21, x22, [sp,#32] - stp x23, x24, [sp,#48] - stp x25, x26, [sp,#64] - stp x27, x28, [sp,#80] - /* - * Stash suspend finisher and its argument in x20 and x19 - */ - mov x19, x0 - mov x20, x1 + stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] + stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] + stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] + stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48] + stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64] + stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80] + + /* save the sp in cpu_suspend_ctx */ mov x2, sp - sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx - mov x0, sp - /* - * x0 now points to struct cpu_suspend_ctx allocated on the stack - */ - str x2, [x0, #CPU_CTX_SP] - ldr x1, =sleep_save_sp - ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] + str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] + + /* find the mpidr_hash */ + ldr x1, =sleep_save_stash + ldr x1, [x1] mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -93,74 +86,28 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 - bl __cpu_suspend_save - /* - * Grab suspend finisher in x20 and its argument in x19 - */ - mov x0, x19 - mov x1, x20 - /* - * We are ready for power down, fire off the suspend finisher - * in x1, with argument in x0 - */ - blr x1 - /* - * Never gets here, unless suspend finisher fails. - * Successful cpu_suspend should return from cpu_resume, returning - * through this code path is considered an error - * If the return value is set to 0 force x0 = -EOPNOTSUPP - * to make sure a proper error condition is propagated - */ - cmp x0, #0 - mov x3, #-EOPNOTSUPP - csel x0, x3, x0, eq - add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 + + str x0, [x1] + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS + stp x29, lr, [sp, #-16]! + bl cpu_do_suspend + ldp x29, lr, [sp], #16 + mov x0, #1 ret ENDPROC(__cpu_suspend_enter) .ltorg -/* - * x0 must contain the sctlr value retrieved from restored context - */ - .pushsection ".idmap.text", "ax" -ENTRY(cpu_resume_mmu) - ldr x3, =cpu_resume_after_mmu - msr sctlr_el1, x0 // restore sctlr_el1 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb - br x3 // global jump to virtual address -ENDPROC(cpu_resume_mmu) - .popsection -cpu_resume_after_mmu: -#ifdef CONFIG_KASAN - mov x0, sp - bl kasan_unpoison_remaining_stack -#endif - mov x0, #0 // return zero on success - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 - ret -ENDPROC(cpu_resume_after_mmu) - ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + /* enable the MMU early - so we can access sleep_save_stash by va */ + adr_l lr, __enable_mmu /* __cpu_setup will return here */ + ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + b __cpu_setup +ENDPROC(cpu_resume) + +ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -170,20 +117,32 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ - ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS + ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] + add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] - /* load physical address of identity map page table in x1 */ - adrp x1, idmap_pg_dir mov sp, x2 /* save thread_info */ and x2, x2, #~(THREAD_SIZE - 1) msr sp_el0, x2 /* - * cpu_do_resume expects x0 to contain context physical address - * pointer and x1 to contain physical address of 1:1 page tables + * cpu_do_resume expects x0 to contain context address pointer */ - bl cpu_do_resume // PC relative jump, MMU off - b cpu_resume_mmu // Resume MMU, never returns -ENDPROC(cpu_resume) + bl cpu_do_resume + +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_remaining_stack +#endif + + ldp x19, x20, [x29, #16] + ldp x21, x22, [x29, #32] + ldp x23, x24, [x29, #48] + ldp x25, x26, [x29, #64] + ldp x27, x28, [x29, #80] + ldp x29, lr, [x29] + mov x0, #0 + ret +ENDPROC(_cpu_resume) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b2d5f4ee9a1c..678e0842cb3b 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -45,6 +45,7 @@ #include <asm/cputype.h> #include <asm/cpu_ops.h> #include <asm/mmu_context.h> +#include <asm/numa.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/processor.h> @@ -75,6 +76,43 @@ enum ipi_msg_type { IPI_WAKEUP }; +#ifdef CONFIG_ARM64_VHE + +/* Whether the boot CPU is running in HYP mode or not*/ +static bool boot_cpu_hyp_mode; + +static inline void save_boot_cpu_run_el(void) +{ + boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); +} + +static inline bool is_boot_cpu_in_hyp_mode(void) +{ + return boot_cpu_hyp_mode; +} + +/* + * Verify that a secondary CPU is running the kernel at the same + * EL as that of the boot CPU. + */ +void verify_cpu_run_el(void) +{ + bool in_el2 = is_kernel_in_hyp_mode(); + bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); + + if (in_el2 ^ boot_cpu_el2) { + pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", + smp_processor_id(), + in_el2 ? 2 : 1, + boot_cpu_el2 ? 2 : 1); + cpu_panic_kernel(); + } +} + +#else +static inline void save_boot_cpu_run_el(void) {} +#endif + #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -166,6 +204,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) static void smp_store_cpu_info(unsigned int cpuid) { store_cpu_topology(cpuid); + numa_store_cpu_info(cpuid); } /* @@ -225,8 +264,6 @@ asmlinkage void secondary_start_kernel(void) pr_info("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); update_cpu_boot_status(CPU_BOOT_SUCCESS); - /* Make sure the status update is visible before we complete */ - smp_wmb(); set_cpu_online(cpu, true); complete(&cpu_running); @@ -401,6 +438,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { cpuinfo_store_boot_cpu(); + save_boot_cpu_run_el(); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } @@ -595,6 +633,8 @@ static void __init of_parse_and_init_cpus(void) pr_debug("cpu logical map 0x%llx\n", hwid); cpu_logical_map(cpu_count) = hwid; + + early_map_cpu_to_node(cpu_count, of_node_to_nid(dn)); next: cpu_count++; } @@ -647,33 +687,18 @@ void __init smp_init_cpus(void) void __init smp_prepare_cpus(unsigned int max_cpus) { int err; - unsigned int cpu, ncores = num_possible_cpus(); + unsigned int cpu; init_cpu_topology(); smp_store_cpu_info(smp_processor_id()); /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; - - /* Don't bother if we're effectively UP */ - if (max_cpus <= 1) - return; - - /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the * secondaries from the bootloader. - * - * Make sure we online at most (max_cpus - 1) additional CPUs. */ - max_cpus--; for_each_possible_cpu(cpu) { - if (max_cpus == 0) - break; if (cpu == smp_processor_id()) continue; @@ -686,7 +711,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) continue; set_cpu_present(cpu, true); - max_cpus--; } } @@ -763,21 +787,11 @@ void arch_irq_work_raise(void) } #endif -static DEFINE_RAW_SPINLOCK(stop_lock); - /* * ipi_cpu_stop - handle IPI from smp_send_stop() */ static void ipi_cpu_stop(unsigned int cpu) { - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) { - raw_spin_lock(&stop_lock); - pr_crit("CPU%u: stopping\n", cpu); - dump_stack(); - raw_spin_unlock(&stop_lock); - } - set_cpu_online(cpu, false); local_irq_disable(); @@ -872,6 +886,9 @@ void smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); + if (system_state == SYSTEM_BOOTING || + system_state == SYSTEM_RUNNING) + pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_STOP); } @@ -881,7 +898,8 @@ void smp_send_stop(void) udelay(1); if (num_online_cpus() > 1) - pr_warning("SMP: failed to stop secondary CPUs\n"); + pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(cpu_online_mask)); } /* diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 66055392f445..b616e365cee3 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,30 +10,11 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> -extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long)); /* - * This is called by __cpu_suspend_enter() to save the state, and do whatever - * flushing is required to ensure that when the CPU goes to sleep we have - * the necessary data available when the caches are not searched. - * - * ptr: CPU context virtual address - * save_ptr: address of the location where the context physical address - * must be saved + * This is allocated by cpu_suspend_init(), and used to store a pointer to + * the 'struct sleep_stack_data' the contains a particular CPUs state. */ -void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, - phys_addr_t *save_ptr) -{ - *save_ptr = virt_to_phys(ptr); - - cpu_do_suspend(ptr); - /* - * Only flush the context that must be retrieved with the MMU - * off. VA primitives ensure the flush is applied to all - * cache levels so context is pushed to DRAM. - */ - __flush_dcache_area(ptr, sizeof(*ptr)); - __flush_dcache_area(save_ptr, sizeof(*save_ptr)); -} +unsigned long *sleep_save_stash; /* * This hook is provided so that cpu_suspend code can restore HW @@ -51,6 +32,30 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } +void notrace __cpu_suspend_exit(void) +{ + /* + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. + */ + cpu_uninstall_idmap(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); +} + /* * cpu_suspend * @@ -60,8 +65,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - int ret; + int ret = 0; unsigned long flags; + struct sleep_stack_data state; /* * From this point debug exceptions are disabled to prevent @@ -77,34 +83,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ pause_graph_tracing(); - /* - * mm context saved on the stack, it will be restored when - * the cpu comes out of reset through the identity mapped - * page tables, so that the thread address space is properly - * set-up on function return. - */ - ret = __cpu_suspend_enter(arg, fn); - if (ret == 0) { - /* - * We are resuming from reset with the idmap active in TTBR0_EL1. - * We must uninstall the idmap and restore the expected MMU - * state before we can possibly return to userspace. - */ - cpu_uninstall_idmap(); + if (__cpu_suspend_enter(&state)) { + /* Call the suspend finisher */ + ret = fn(arg); /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. + * Never gets here, unless the suspend finisher fails. + * Successful cpu_suspend() should return from cpu_resume(), + * returning through this code path is considered an error + * If the return value is set to 0 force ret = -EOPNOTSUPP + * to make sure a proper error condition is propagated */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - - /* - * Restore HW breakpoint registers to sane values - * before debug exceptions are possibly reenabled - * through local_dbg_restore. - */ - if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + if (!ret) + ret = -EOPNOTSUPP; + } else { + __cpu_suspend_exit(); } unpause_graph_tracing(); @@ -119,22 +112,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return ret; } -struct sleep_save_sp sleep_save_sp; - static int __init cpu_suspend_init(void) { - void *ctx_ptr; - /* ctx_ptr is an array of physical addresses */ - ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash), + GFP_KERNEL); - if (WARN_ON(!ctx_ptr)) + if (WARN_ON(!sleep_save_stash)) return -ENOMEM; - sleep_save_sp.save_ptr_stash = ctx_ptr; - sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); - __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); - return 0; } early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 75151aaf1a52..26fe8ea93ea2 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -25,6 +25,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/syscalls.h> +#include <asm/cpufeature.h> asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -36,11 +37,20 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } +SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) +{ + if (personality(personality) == PER_LINUX32 && + !system_supports_32bit_el0()) + return -EINVAL; + return sys_personality(personality); +} + /* * Wrappers to pass the pt_regs argument. */ asmlinkage long sys_rt_sigreturn_wrapper(void); #define sys_rt_sigreturn sys_rt_sigreturn_wrapper +#define sys_personality sys_arm64_personality #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 97bc68f4c689..64fc030be0f2 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -131,11 +131,11 @@ static int __init vdso_init(void) return -ENOMEM; /* Grab the vDSO data page. */ - vdso_pagelist[0] = virt_to_page(vdso_data); + vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data))); /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i); /* Populate the special mapping structures */ vdso_spec[0] = (struct vm_special_mapping) { diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5a1939a74ff3..435e820e898d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -46,6 +46,16 @@ jiffies = jiffies_64; *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; +#ifdef CONFIG_HIBERNATION +#define HIBERNATE_TEXT \ + . = ALIGN(SZ_4K); \ + VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + *(.hibernate_exit.text) \ + VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; +#else +#define HIBERNATE_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -63,14 +73,19 @@ PECOFF_FILE_ALIGNMENT = 0x200; #endif #if defined(CONFIG_DEBUG_ALIGN_RODATA) -#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); -#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO -#elif defined(CONFIG_DEBUG_RODATA) -#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT); -#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO +/* + * 4 KB granule: 1 level 2 entry + * 16 KB granule: 128 level 3 entries, with contiguous bit + * 64 KB granule: 32 level 3 entries, with contiguous bit + */ +#define SEGMENT_ALIGN SZ_2M #else -#define ALIGN_DEBUG_RO -#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); +/* + * 4 KB granule: 16 level 3 entries, with contiguous bit + * 16 KB granule: 4 level 3 entries, without contiguous bit + * 64 KB granule: 1 level 3 entry + */ +#define SEGMENT_ALIGN SZ_64K #endif SECTIONS @@ -96,7 +111,6 @@ SECTIONS _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -109,18 +123,19 @@ SECTIONS LOCK_TEXT HYPERVISOR_TEXT IDMAP_TEXT + HIBERNATE_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); *(.got) /* Global offset table */ } - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + . = ALIGN(SEGMENT_ALIGN); RO_DATA(PAGE_SIZE) /* everything from this point to */ EXCEPTION_TABLE(8) /* _etext will be marked RO NX */ NOTES - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + . = ALIGN(SEGMENT_ALIGN); _etext = .; /* End of text and rodata section */ __init_begin = .; @@ -154,12 +169,9 @@ SECTIONS *(.altinstr_replacement) } .rela : ALIGN(8) { - __reloc_start = .; *(.rela .rela*) - __reloc_end = .; } .dynsym : ALIGN(8) { - __dynsym_start = .; *(.dynsym) } .dynstr : { @@ -169,7 +181,11 @@ SECTIONS *(.hash) } - . = ALIGN(PAGE_SIZE); + __rela_offset = ADDR(.rela) - KIMAGE_VADDR; + __rela_size = SIZEOF(.rela); + __dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR; + + . = ALIGN(SEGMENT_ALIGN); __init_end = .; _data = .; @@ -201,6 +217,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "HYP init code too big or misaligned") ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "ID map text too big or misaligned") +#ifdef CONFIG_HIBERNATION +ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) + <= SZ_4K, "Hibernate exit text too big or misaligned") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index eba89e42f0ed..3246c4aba5b1 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -186,6 +186,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, exit_handler = kvm_get_exit_handler(vcpu); return exit_handler(vcpu, run); + case ARM_EXCEPTION_HYP_GONE: + /* + * EL2 has been reset to the hyp-stub. This happens when a guest + * is pre-empted by kvm_reboot()'s shutdown call. + */ + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + return 0; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 7d8747c6427c..a873a6d8be90 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -21,6 +21,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> #include <asm/pgtable-hwdef.h> +#include <asm/sysreg.h> .text .pushsection .hyp.idmap.text, "ax" @@ -103,8 +104,8 @@ __do_hyp_init: dsb sy mrs x4, sctlr_el2 - and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2 - ldr x5, =SCTLR_EL2_FLAGS + and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2 + ldr x5, =SCTLR_ELx_FLAGS orr x4, x4, x5 msr sctlr_el2, x4 isb @@ -138,6 +139,49 @@ merged: eret ENDPROC(__kvm_hyp_init) + /* + * Reset kvm back to the hyp stub. This is the trampoline dance in + * reverse. If kvm used an extended idmap, __extended_idmap_trampoline + * calls this code directly in the idmap. In this case switching to the + * boot tables is a no-op. + * + * x0: HYP boot pgd + * x1: HYP phys_idmap_start + */ +ENTRY(__kvm_hyp_reset) + /* We're in trampoline code in VA, switch back to boot page tables */ + msr ttbr0_el2, x0 + isb + + /* Ensure the PA branch doesn't find a stale tlb entry or stale code. */ + ic iallu + tlbi alle2 + dsb sy + isb + + /* Branch into PA space */ + adr x0, 1f + bfi x1, x0, #0, #PAGE_SHIFT + br x1 + + /* We're now in idmap, disable MMU */ +1: mrs x0, sctlr_el2 + ldr x1, =SCTLR_ELx_FLAGS + bic x0, x0, x1 // Clear SCTL_M and etc + msr sctlr_el2, x0 + isb + + /* Invalidate the old TLBs */ + tlbi alle2 + dsb sy + + /* Install stub vectors */ + adr_l x0, __hyp_stub_vectors + msr vbar_el2, x0 + + eret +ENDPROC(__kvm_hyp_reset) + .ltorg .popsection diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 48f19a37b3df..7ce931565151 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -35,16 +35,21 @@ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are * passed in x0. * - * A function pointer with a value of 0 has a special meaning, and is - * used to implement __hyp_get_vectors in the same way as in + * A function pointer with a value less than 0xfff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in * arch/arm64/kernel/hyp_stub.S. + * HVC behaves as a 'bl' call and will clobber lr. */ ENTRY(__kvm_call_hyp) -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + str lr, [sp, #-16]! hvc #0 + ldr lr, [sp], #16 ret alternative_else b __vhe_hyp_call nop + nop + nop alternative_endif ENDPROC(__kvm_call_hyp) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index ce9e5e5f28cf..70254a65bd5b 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -164,3 +164,22 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) + +/* + * When using the extended idmap, we don't have a trampoline page we can use + * while we switch pages tables during __kvm_hyp_reset. Accessing the idmap + * directly would be ideal, but if we're using the extended idmap then the + * idmap is located above HYP_PAGE_OFFSET, and the address will be masked by + * kvm_call_hyp using kern_hyp_va. + * + * x0: HYP boot pgd + * x1: HYP phys_idmap_start + */ +ENTRY(__extended_idmap_trampoline) + mov x4, x1 + adr_l x3, __kvm_hyp_reset + + /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ + bfi x4, x3, #0, #PAGE_SHIFT + br x4 +ENDPROC(__extended_idmap_trampoline) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 3488894397ff..2d87f36d5cb4 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -42,19 +42,17 @@ * Shuffle the parameters before calling the function * pointed to in x0. Assumes parameters in x[1,2,3]. */ - sub sp, sp, #16 - str lr, [sp] mov lr, x0 mov x0, x1 mov x1, x2 mov x2, x3 blr lr - ldr lr, [sp] - add sp, sp, #16 .endm ENTRY(__vhe_hyp_call) + str lr, [sp, #-16]! do_el2_call + ldr lr, [sp], #16 /* * We used to rely on having an exception return to get * an implicit isb. In the E2H case, we don't have it anymore. @@ -84,8 +82,8 @@ alternative_endif /* Here, we're pretty sure the host called HVC. */ restore_x0_to_x3 - /* Check for __hyp_get_vectors */ - cbnz x0, 1f + cmp x0, #HVC_GET_VECTORS + b.ne 1f mrs x0, vbar_el2 b 2f diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 9677bf069bcc..b1ad730e1567 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -29,7 +29,9 @@ #include <asm/cputype.h> #include <asm/ptrace.h> #include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> /* * ARMv8 Reset Values @@ -130,3 +132,31 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset timer */ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); } + +extern char __hyp_idmap_text_start[]; + +unsigned long kvm_hyp_reset_entry(void) +{ + if (!__kvm_cpu_uses_extended_idmap()) { + unsigned long offset; + + /* + * Find the address of __kvm_hyp_reset() in the trampoline page. + * This is present in the running page tables, and the boot page + * tables, so we call the code here to start the trampoline + * dance in reverse. + */ + offset = (unsigned long)__kvm_hyp_reset + - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK); + + return TRAMPOLINE_VA + offset; + } else { + /* + * KVM is running with merged page tables, which don't have the + * trampoline page mapped. We know the idmap is still mapped, + * but can't be called into directly. Use + * __extended_idmap_trampoline to do the call. + */ + return (unsigned long)kvm_ksym_ref(__extended_idmap_trampoline); + } +} diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 57f57fde5722..54bb209cae8e 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,6 +4,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_KASAN) += kasan_init.o KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 6df07069a025..50ff9ba3a236 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -24,8 +24,6 @@ #include <asm/cpufeature.h> #include <asm/alternative.h> -#include "proc-macros.S" - /* * flush_icache_range(start,end) * diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index c90c3c5f46af..b7b397802088 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -75,8 +75,7 @@ void verify_cpu_asid_bits(void) */ pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n", smp_processor_id(), asid, asid_bits); - update_cpu_boot_status(CPU_PANIC_KERNEL); - cpu_park_loop(); + cpu_panic_kernel(); } } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a6e757cbab77..fd8b9426f140 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -804,57 +804,24 @@ struct iommu_dma_notifier_data { static LIST_HEAD(iommu_dma_masters); static DEFINE_MUTEX(iommu_dma_notifier_lock); -/* - * Temporarily "borrow" a domain feature flag to to tell if we had to resort - * to creating our own domain here, in case we need to clean it up again. - */ -#define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31) - static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, u64 dma_base, u64 size) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); /* - * Best case: The device is either part of a group which was - * already attached to a domain in a previous call, or it's - * been put in a default DMA domain by the IOMMU core. + * If the IOMMU driver has the DMA domain support that we require, + * then the IOMMU core will have already configured a group for this + * device, and allocated the default domain for that group. */ - if (!domain) { - /* - * Urgh. The IOMMU core isn't going to do default domains - * for non-PCI devices anyway, until it has some means of - * abstracting the entirely implementation-specific - * sideband data/SoC topology/unicorn dust that may or - * may not differentiate upstream masters. - * So until then, HORRIBLE HACKS! - */ - domain = ops->domain_alloc(IOMMU_DOMAIN_DMA); - if (!domain) - goto out_no_domain; - - domain->ops = ops; - domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT; - - if (iommu_attach_device(domain, dev)) - goto out_put_domain; + if (!domain || iommu_dma_init_domain(domain, dma_base, size)) { + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + return false; } - if (iommu_dma_init_domain(domain, dma_base, size)) - goto out_detach; - dev->archdata.dma_ops = &iommu_dma_ops; return true; - -out_detach: - iommu_detach_device(domain, dev); -out_put_domain: - if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) - iommu_domain_free(domain); -out_no_domain: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", - dev_name(dev)); - return false; } static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, @@ -933,6 +900,10 @@ static int __init __iommu_dma_init(void) ret = register_iommu_dma_ops_notifier(&platform_bus_type); if (!ret) ret = register_iommu_dma_ops_notifier(&amba_bustype); +#ifdef CONFIG_PCI + if (!ret) + ret = register_iommu_dma_ops_notifier(&pci_bus_type); +#endif /* handle devices queued before this arch_initcall */ if (!ret) @@ -967,11 +938,8 @@ void arch_teardown_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - if (domain) { + if (WARN_ON(domain)) iommu_detach_device(domain, dev); - if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) - iommu_domain_free(domain); - } dev->archdata.dma_ops = NULL; } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index f9271cb2f5e3..8404190fe2bd 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -23,6 +23,7 @@ #include <linux/seq_file.h> #include <asm/fixmap.h> +#include <asm/kasan.h> #include <asm/memory.h> #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> @@ -32,37 +33,25 @@ struct addr_marker { const char *name; }; -enum address_markers_idx { - MODULES_START_NR = 0, - MODULES_END_NR, - VMALLOC_START_NR, - VMALLOC_END_NR, -#ifdef CONFIG_SPARSEMEM_VMEMMAP - VMEMMAP_START_NR, - VMEMMAP_END_NR, +static const struct addr_marker address_markers[] = { +#ifdef CONFIG_KASAN + { KASAN_SHADOW_START, "Kasan shadow start" }, + { KASAN_SHADOW_END, "Kasan shadow end" }, #endif - FIXADDR_START_NR, - FIXADDR_END_NR, - PCI_START_NR, - PCI_END_NR, - KERNEL_SPACE_NR, -}; - -static struct addr_marker address_markers[] = { - { MODULES_VADDR, "Modules start" }, - { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMALLOC_END, "vmalloc() End" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { VMALLOC_START, "vmalloc() Area" }, + { VMALLOC_END, "vmalloc() End" }, + { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOP, "Fixmap end" }, + { PCI_IO_START, "PCI I/O start" }, + { PCI_IO_END, "PCI I/O end" }, #ifdef CONFIG_SPARSEMEM_VMEMMAP - { 0, "vmemmap start" }, - { 0, "vmemmap end" }, + { VMEMMAP_START, "vmemmap start" }, + { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { FIXADDR_START, "Fixmap start" }, - { FIXADDR_TOP, "Fixmap end" }, - { PCI_IO_START, "PCI I/O start" }, - { PCI_IO_END, "PCI I/O end" }, - { PAGE_OFFSET, "Linear Mapping" }, - { -1, NULL }, + { PAGE_OFFSET, "Linear Mapping" }, + { -1, NULL }, }; /* @@ -347,13 +336,6 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; -#ifdef CONFIG_SPARSEMEM_VMEMMAP - address_markers[VMEMMAP_START_NR].start_address = - (unsigned long)virt_to_page(PAGE_OFFSET); - address_markers[VMEMMAP_END_NR].start_address = - (unsigned long)virt_to_page(high_memory); -#endif - pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); return pe ? 0 : -ENOMEM; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 95df28bc875f..5954881a35ac 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -81,6 +81,56 @@ void show_pte(struct mm_struct *mm, unsigned long addr) printk("\n"); } +#ifdef CONFIG_ARM64_HW_AFDBM +/* + * This function sets the access flags (dirty, accessed), as well as write + * permission, and only to a more permissive setting. + * + * It needs to cope with hardware update of the accessed/dirty state by other + * agents in the system and can safely skip the __sync_icache_dcache() call as, + * like set_pte_at(), the PTE is never changed from no-exec to exec here. + * + * Returns whether or not the PTE actually changed. + */ +int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + pteval_t old_pteval; + unsigned int tmp; + + if (pte_same(*ptep, entry)) + return 0; + + /* only preserve the access flags and write permission */ + pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; + + /* + * PTE_RDONLY is cleared by default in the asm below, so set it in + * back if necessary (read-only or clean PTE). + */ + if (!pte_write(entry) || !dirty) + pte_val(entry) |= PTE_RDONLY; + + /* + * Setting the flags must be done atomically to avoid racing with the + * hardware update of the access/dirty state. + */ + asm volatile("// ptep_set_access_flags\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " and %0, %0, %3 // clear PTE_RDONLY\n" + " orr %0, %0, %4 // set flags\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) + : "L" (~PTE_RDONLY), "r" (pte_val(entry))); + + flush_tlb_fix_spurious_fault(vma, address); + return 1; +} +#endif + /* * The kernel tried to access some page that wasn't present. */ @@ -212,10 +262,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, tsk = current; mm = tsk->mm; - /* Enable interrupts if they were enabled in the parent context. */ - if (interrupts_enabled(regs)) - local_irq_enable(); - /* * If we're in an interrupt or have no user context, we must not take * the fault. @@ -555,20 +601,33 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, { const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); struct siginfo info; + int rv; - if (!inf->fn(addr, esr, regs)) - return 1; + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); - pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", - inf->name, esr, addr); + if (!inf->fn(addr, esr, regs)) { + rv = 1; + } else { + pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, addr); + + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info.si_addr = (void __user *)addr; + arm64_notify_die("", regs, &info, 0); + rv = 0; + } - info.si_signo = inf->sig; - info.si_errno = 0; - info.si_code = inf->code; - info.si_addr = (void __user *)addr; - arm64_notify_die("", regs, &info, 0); + if (interrupts_enabled(regs)) + trace_hardirqs_on(); - return 0; + return rv; } #ifdef CONFIG_ARM64_PAN diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ea989d83ea9b..d45f8627012c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -40,6 +40,7 @@ #include <asm/kasan.h> #include <asm/kernel-pgtable.h> #include <asm/memory.h> +#include <asm/numa.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -86,6 +87,21 @@ static phys_addr_t __init max_zone_dma_phys(void) return min(offset + (1ULL << 32), memblock_end_of_DRAM()); } +#ifdef CONFIG_NUMA + +static void __init zone_sizes_init(unsigned long min, unsigned long max) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; + + if (IS_ENABLED(CONFIG_ZONE_DMA)) + max_zone_pfns[ZONE_DMA] = PFN_DOWN(max_zone_dma_phys()); + max_zone_pfns[ZONE_NORMAL] = max; + + free_area_init_nodes(max_zone_pfns); +} + +#else + static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; @@ -126,6 +142,8 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) free_area_init_node(0, zone_size, min, zhole_size); } +#endif /* CONFIG_NUMA */ + #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { @@ -142,10 +160,15 @@ static void __init arm64_memory_present(void) static void __init arm64_memory_present(void) { struct memblock_region *reg; + int nid = 0; - for_each_memblock(memory, reg) - memory_present(0, memblock_region_memory_base_pfn(reg), - memblock_region_memory_end_pfn(reg)); + for_each_memblock(memory, reg) { +#ifdef CONFIG_NUMA + nid = reg->nid; +#endif + memory_present(nid, memblock_region_memory_base_pfn(reg), + memblock_region_memory_end_pfn(reg)); + } } #endif @@ -190,8 +213,12 @@ void __init arm64_memblock_init(void) */ memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)), ULLONG_MAX); - if (memblock_end_of_DRAM() > linear_region_size) - memblock_remove(0, memblock_end_of_DRAM() - linear_region_size); + if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { + /* ensure that memstart_addr remains sufficiently aligned */ + memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, + ARM64_MEMSTART_ALIGN); + memblock_remove(0, memstart_addr); + } /* * Apply the memory limit if it was set. Since the kernel may be loaded @@ -203,6 +230,35 @@ void __init arm64_memblock_init(void) memblock_add(__pa(_text), (u64)(_end - _text)); } + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) { + /* + * Add back the memory we just removed if it results in the + * initrd to become inaccessible via the linear mapping. + * Otherwise, this is a no-op + */ + u64 base = initrd_start & PAGE_MASK; + u64 size = PAGE_ALIGN(initrd_end) - base; + + /* + * We can only add back the initrd memory if we don't end up + * with more memory than we can address via the linear mapping. + * It is up to the bootloader to position the kernel and the + * initrd reasonably close to each other (i.e., within 32 GB of + * each other) so that all granule/#levels combinations can + * always access both. + */ + if (WARN(base < memblock_start_of_DRAM() || + base + size > memblock_start_of_DRAM() + + linear_region_size, + "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) { + initrd_start = 0; + } else { + memblock_remove(base, size); /* clear MEMBLOCK_ flags */ + memblock_add(base, size); + memblock_reserve(base, size); + } + } + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern u16 memstart_offset_seed; u64 range = linear_region_size - @@ -245,7 +301,6 @@ void __init arm64_memblock_init(void) dma_contiguous_reserve(arm64_dma_phys_limit); memblock_allow_resize(); - memblock_dump_all(); } void __init bootmem_init(void) @@ -257,6 +312,9 @@ void __init bootmem_init(void) early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT); + max_pfn = max_low_pfn = max; + + arm64_numa_init(); /* * Sparsemem tries to allocate bootmem in memory_present(), so must be * done after the fixed reservations. @@ -267,7 +325,7 @@ void __init bootmem_init(void) zone_sizes_init(min, max); high_memory = __va((max << PAGE_SHIFT) - 1) + 1; - max_pfn = max_low_pfn = max; + memblock_dump_all(); } #ifndef CONFIG_SPARSEMEM_VMEMMAP @@ -371,26 +429,27 @@ void __init mem_init(void) MLM(MODULES_VADDR, MODULES_END)); pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); - pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(_text, __start_rodata), - MLK_ROUNDUP(__start_rodata, _etext), - MLK_ROUNDUP(__init_begin, __init_end), + pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", + MLK_ROUNDUP(_text, __start_rodata)); + pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", + MLK_ROUNDUP(__start_rodata, _etext)); + pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", + MLK_ROUNDUP(__init_begin, __init_end)); + pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(_sdata, _edata)); -#ifdef CONFIG_SPARSEMEM_VMEMMAP - pr_cont(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" - " 0x%16lx - 0x%16lx (%6ld MB actual)\n", - MLG(VMEMMAP_START, - VMEMMAP_START + VMEMMAP_SIZE), - MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), - (unsigned long)virt_to_page(high_memory))); -#endif + pr_cont(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n", + MLK_ROUNDUP(__bss_start, __bss_stop)); pr_cont(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", MLK(FIXADDR_START, FIXADDR_TOP)); pr_cont(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(PCI_IO_START, PCI_IO_END)); +#ifdef CONFIG_SPARSEMEM_VMEMMAP + pr_cont(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", + MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE)); + pr_cont(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", + MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), + (unsigned long)virt_to_page(high_memory))); +#endif pr_cont(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(__phys_to_virt(memblock_start_of_DRAM()), (unsigned long)high_memory)); @@ -407,6 +466,12 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64); #endif + /* + * Make sure we chose the upper bound of sizeof(struct page) + * correctly. + */ + BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT)); + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* @@ -419,7 +484,8 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)), + 0, "unused kernel"); fixup_init(); } diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index ef47d99b5cbc..71fe98985455 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,3 +1,2 @@ -extern void __init bootmem_init(void); void fixup_init(void); diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 232f787a088a..01c171723bb3 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -95,8 +95,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } -EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); - /* * You really shouldn't be using read() or write() on /dev/mem. This might go diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f3e5c74233f3..0f85a46c3e18 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -385,7 +385,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { - unsigned long kernel_start = __pa(_stext); + unsigned long kernel_start = __pa(_text); unsigned long kernel_end = __pa(_etext); /* @@ -417,7 +417,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end early_pgtable_alloc); /* - * Map the linear alias of the [_stext, _etext) interval as + * Map the linear alias of the [_text, _etext) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. @@ -449,8 +449,8 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)__start_rodata - (unsigned long)_stext; - create_mapping_late(__pa(_stext), (unsigned long)_stext, + section_size = (unsigned long)__start_rodata - (unsigned long)_text; + create_mapping_late(__pa(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use _etext rather than __end_rodata to @@ -471,8 +471,8 @@ void fixup_init(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma) +static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, + pgprot_t prot, struct vm_struct *vma) { phys_addr_t pa_start = __pa(va_start); unsigned long size = va_end - va_start; @@ -499,11 +499,11 @@ static void __init map_kernel(pgd_t *pgd) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; - map_kernel_chunk(pgd, _stext, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_chunk(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); - map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, - &vmlinux_init); - map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, + &vmlinux_init); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* @@ -564,8 +564,6 @@ void __init paging_init(void) */ memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE, SWAPPER_DIR_SIZE - PAGE_SIZE); - - bootmem_init(); } /* diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c new file mode 100644 index 000000000000..98dc1047f2a2 --- /dev/null +++ b/arch/arm64/mm/numa.c @@ -0,0 +1,396 @@ +/* + * NUMA support, based on the x86 implementation. + * + * Copyright (C) 2015 Cavium Inc. + * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/bootmem.h> +#include <linux/memblock.h> +#include <linux/module.h> +#include <linux/of.h> + +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); +nodemask_t numa_nodes_parsed __initdata; +static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; + +static int numa_distance_cnt; +static u8 *numa_distance; +static int numa_off; + +static __init int numa_parse_early_param(char *opt) +{ + if (!opt) + return -EINVAL; + if (!strncmp(opt, "off", 3)) { + pr_info("%s\n", "NUMA turned off"); + numa_off = 1; + } + return 0; +} +early_param("numa", numa_parse_early_param); + +cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; +EXPORT_SYMBOL(node_to_cpumask_map); + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + +/* + * Returns a pointer to the bitmask of CPUs on Node 'node'. + */ +const struct cpumask *cpumask_of_node(int node) +{ + if (WARN_ON(node >= nr_node_ids)) + return cpu_none_mask; + + if (WARN_ON(node_to_cpumask_map[node] == NULL)) + return cpu_online_mask; + + return node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(cpumask_of_node); + +#endif + +static void map_cpu_to_node(unsigned int cpu, int nid) +{ + set_cpu_numa_node(cpu, nid); + if (nid >= 0) + cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); +} + +void numa_clear_node(unsigned int cpu) +{ + int nid = cpu_to_node(cpu); + + if (nid >= 0) + cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]); + set_cpu_numa_node(cpu, NUMA_NO_NODE); +} + +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: cpumask_of_node() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) + */ +static void __init setup_node_to_cpumask_map(void) +{ + unsigned int cpu; + int node; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) + setup_nr_node_ids(); + + /* allocate and clear the mapping */ + for (node = 0; node < nr_node_ids; node++) { + alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); + cpumask_clear(node_to_cpumask_map[node]); + } + + for_each_possible_cpu(cpu) + set_cpu_numa_node(cpu, NUMA_NO_NODE); + + /* cpumask_of_node() will now work */ + pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids); +} + +/* + * Set the cpu to node and mem mapping + */ +void numa_store_cpu_info(unsigned int cpu) +{ + map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]); +} + +void __init early_map_cpu_to_node(unsigned int cpu, int nid) +{ + /* fallback to node 0 */ + if (nid < 0 || nid >= MAX_NUMNODES) + nid = 0; + + cpu_to_node_map[cpu] = nid; +} + +/** + * numa_add_memblk - Set node id to memblk + * @nid: NUMA node ID of the new memblk + * @start: Start address of the new memblk + * @size: Size of the new memblk + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init numa_add_memblk(int nid, u64 start, u64 size) +{ + int ret; + + ret = memblock_set_node(start, size, &memblock.memory, nid); + if (ret < 0) { + pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n", + start, (start + size - 1), nid); + return ret; + } + + node_set(nid, numa_nodes_parsed); + pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n", + start, (start + size - 1), nid); + return ret; +} + +/** + * Initialize NODE_DATA for a node on the local memory + */ +static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) +{ + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); + u64 nd_pa; + void *nd; + int tnid; + + pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, + (end_pfn << PAGE_SHIFT) - 1); + + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd = __va(nd_pa); + + /* report and initialize */ + pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (tnid != nid) + pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid); + + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; +} + +/** + * numa_free_distance + * + * The current table is freed. + */ +void __init numa_free_distance(void) +{ + size_t size; + + if (!numa_distance) + return; + + size = numa_distance_cnt * numa_distance_cnt * + sizeof(numa_distance[0]); + + memblock_free(__pa(numa_distance), size); + numa_distance_cnt = 0; + numa_distance = NULL; +} + +/** + * + * Create a new NUMA distance table. + * + */ +static int __init numa_alloc_distance(void) +{ + size_t size; + u64 phys; + int i, j; + + size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]); + phys = memblock_find_in_range(0, PFN_PHYS(max_pfn), + size, PAGE_SIZE); + if (WARN_ON(!phys)) + return -ENOMEM; + + memblock_reserve(phys, size); + + numa_distance = __va(phys); + numa_distance_cnt = nr_node_ids; + + /* fill with the default distances */ + for (i = 0; i < numa_distance_cnt; i++) + for (j = 0; j < numa_distance_cnt; j++) + numa_distance[i * numa_distance_cnt + j] = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + + pr_debug("NUMA: Initialized distance table, cnt=%d\n", + numa_distance_cnt); + + return 0; +} + +/** + * numa_set_distance - Set inter node NUMA distance from node to node. + * @from: the 'from' node to set distance + * @to: the 'to' node to set distance + * @distance: NUMA distance + * + * Set the distance from node @from to @to to @distance. + * If distance table doesn't exist, a warning is printed. + * + * If @from or @to is higher than the highest known node or lower than zero + * or @distance doesn't make sense, the call is ignored. + * + */ +void __init numa_set_distance(int from, int to, int distance) +{ + if (!numa_distance) { + pr_warn_once("NUMA: Warning: distance table not allocated yet\n"); + return; + } + + if (from >= numa_distance_cnt || to >= numa_distance_cnt || + from < 0 || to < 0) { + pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + if ((u8)distance != distance || + (from == to && distance != LOCAL_DISTANCE)) { + pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + numa_distance[from * numa_distance_cnt + to] = distance; +} + +/** + * Return NUMA distance @from to @to + */ +int __node_distance(int from, int to) +{ + if (from >= numa_distance_cnt || to >= numa_distance_cnt) + return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; + return numa_distance[from * numa_distance_cnt + to]; +} +EXPORT_SYMBOL(__node_distance); + +static int __init numa_register_nodes(void) +{ + int nid; + struct memblock_region *mblk; + + /* Check that valid nid is set to memblks */ + for_each_memblock(memory, mblk) + if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) { + pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + mblk->nid, mblk->base, + mblk->base + mblk->size - 1); + return -EINVAL; + } + + /* Finally register nodes. */ + for_each_node_mask(nid, numa_nodes_parsed) { + unsigned long start_pfn, end_pfn; + + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + setup_node_data(nid, start_pfn, end_pfn); + node_set_online(nid); + } + + /* Setup online nodes to actual nodes*/ + node_possible_map = numa_nodes_parsed; + + return 0; +} + +static int __init numa_init(int (*init_func)(void)) +{ + int ret; + + nodes_clear(numa_nodes_parsed); + nodes_clear(node_possible_map); + nodes_clear(node_online_map); + numa_free_distance(); + + ret = numa_alloc_distance(); + if (ret < 0) + return ret; + + ret = init_func(); + if (ret < 0) + return ret; + + if (nodes_empty(numa_nodes_parsed)) + return -EINVAL; + + ret = numa_register_nodes(); + if (ret < 0) + return ret; + + setup_node_to_cpumask_map(); + + /* init boot processor */ + cpu_to_node_map[0] = 0; + map_cpu_to_node(0, 0); + + return 0; +} + +/** + * dummy_numa_init - Fallback dummy NUMA init + * + * Used if there's no underlying NUMA architecture, NUMA initialization + * fails, or NUMA is disabled on the command line. + * + * Must online at least one node (node 0) and add memory blocks that cover all + * allowed memory. It is unlikely that this function fails. + */ +static int __init dummy_numa_init(void) +{ + int ret; + struct memblock_region *mblk; + + pr_info("%s\n", "No NUMA configuration found"); + pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", + 0LLU, PFN_PHYS(max_pfn) - 1); + + for_each_memblock(memory, mblk) { + ret = numa_add_memblk(0, mblk->base, mblk->size); + if (!ret) + continue; + + pr_err("NUMA init failed\n"); + return ret; + } + + numa_off = 1; + return 0; +} + +/** + * arm64_numa_init - Initialize NUMA + * + * Try each configured NUMA initialization method until one succeeds. The + * last fallback is dummy single node config encomapssing whole memory. + */ +void __init arm64_numa_init(void) +{ + if (!numa_off) { + if (!numa_init(of_numa_init)) + return; + } + + numa_init(dummy_numa_init); +} diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S deleted file mode 100644 index e6a30e1268a8..000000000000 --- a/arch/arm64/mm/proc-macros.S +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Based on arch/arm/mm/proc-macros.S - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> - -/* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* - * mmid - get context id from mm pointer (mm->context.id) - */ - .macro mmid, rd, rn - ldr \rd, [\rn, #MM_CONTEXT_ID] - .endm - -/* - * dcache_line_size - get the minimum D-cache line size from the CTR register. - */ - .macro dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm \tmp, \tmp, #16, #19 // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * icache_line_size - get the minimum I-cache line size from the CTR register. - */ - .macro icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map - */ - .macro tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48 - ldr_l \tmpreg, idmap_t0sz - bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif - .endm - -/* - * Macro to perform a data cache maintenance for the interval - * [kaddr, kaddr + size) - * - * op: operation passed to dc instruction - * domain: domain used in dsb instruciton - * kaddr: starting virtual address of the region - * size: size of the region - * Corrupts: kaddr, size, tmp1, tmp2 - */ - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 - dcache_line_size \tmp1, \tmp2 - add \size, \kaddr, \size - sub \tmp2, \tmp1, #1 - bic \kaddr, \kaddr, \tmp2 -9998: dc \op, \kaddr - add \kaddr, \kaddr, \tmp1 - cmp \kaddr, \size - b.lo 9998b - dsb \domain - .endm - -/* - * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present - */ - .macro reset_pmuserenr_el0, tmpreg - mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer - sbfx \tmpreg, \tmpreg, #8, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f - msr pmuserenr_el0, xzr // Disable PMU access from EL0 -9000: - .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 543f5198005a..c4317879b938 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -23,13 +23,11 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/hwcap.h> -#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> #include <asm/cpufeature.h> #include <asm/alternative.h> -#include "proc-macros.S" - #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K #elif defined(CONFIG_ARM64_16K_PAGES) @@ -66,62 +64,50 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, mair_el1 - mrs x6, cpacr_el1 - mrs x7, ttbr1_el1 - mrs x8, tcr_el1 - mrs x9, vbar_el1 - mrs x10, mdscr_el1 - mrs x11, oslsr_el1 - mrs x12, sctlr_el1 + mrs x5, cpacr_el1 + mrs x6, tcr_el1 + mrs x7, vbar_el1 + mrs x8, mdscr_el1 + mrs x9, oslsr_el1 + mrs x10, sctlr_el1 stp x2, x3, [x0] - stp x4, x5, [x0, #16] - stp x6, x7, [x0, #32] - stp x8, x9, [x0, #48] - stp x10, x11, [x0, #64] - str x12, [x0, #80] + stp x4, xzr, [x0, #16] + stp x5, x6, [x0, #32] + stp x7, x8, [x0, #48] + stp x9, x10, [x0, #64] ret ENDPROC(cpu_do_suspend) /** * cpu_do_resume - restore CPU register context * - * x0: Physical address of context pointer - * x1: ttbr0_el1 to be restored - * - * Returns: - * sctlr_el1 value in x0 + * x0: Address of context pointer */ ENTRY(cpu_do_resume) - /* - * Invalidate local tlb entries before turning on MMU - */ - tlbi vmalle1 ldp x2, x3, [x0] ldp x4, x5, [x0, #16] - ldp x6, x7, [x0, #32] - ldp x8, x9, [x0, #48] - ldp x10, x11, [x0, #64] - ldr x12, [x0, #80] + ldp x6, x8, [x0, #32] + ldp x9, x10, [x0, #48] + ldp x11, x12, [x0, #64] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 - msr mair_el1, x5 msr cpacr_el1, x6 - msr ttbr0_el1, x1 - msr ttbr1_el1, x7 - tcr_set_idmap_t0sz x8, x7 + + /* Don't change t0sz here, mask those bits when restoring */ + mrs x5, tcr_el1 + bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + msr tcr_el1, x8 msr vbar_el1, x9 msr mdscr_el1, x10 + msr sctlr_el1, x12 /* * Restore oslsr_el1 by writing oslar_el1 */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 - mov x0, x12 - dsb nsh // Make sure local tlb invalidation completed isb ret ENDPROC(cpu_do_resume) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a34420a5df9a..b405bbb54431 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -476,6 +476,7 @@ emit_cond_jmp: case BPF_JGE: jmp_cond = A64_COND_CS; break; + case BPF_JSET: case BPF_JNE: jmp_cond = A64_COND_NE; break; diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 105c93b00b1b..1d1212901ae7 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -1,7 +1,6 @@ #ifndef _ASM_IA64_IOMMU_H #define _ASM_IA64_IOMMU_H 1 -#define cpu_has_x2apic 0 /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index ce112472bdd6..8b23e070b844 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -49,8 +49,8 @@ __down_read (struct rw_semaphore *sem) /* * lock for writing */ -static inline void -__down_write (struct rw_semaphore *sem) +static inline long +___down_write (struct rw_semaphore *sem) { long old, new; @@ -59,10 +59,26 @@ __down_write (struct rw_semaphore *sem) new = old + RWSEM_ACTIVE_WRITE_BIAS; } while (cmpxchg_acq(&sem->count, old, new) != old); - if (old != 0) + return old; +} + +static inline void +__down_write (struct rw_semaphore *sem) +{ + if (___down_write(sem)) rwsem_down_write_failed(sem); } +static inline int +__down_write_killable (struct rw_semaphore *sem) +{ + if (___down_write(sem)) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + + return 0; +} + /* * unlock after reading */ diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 300dac3702f1..bf0865cd438a 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -531,8 +531,6 @@ efi_init (void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - palo_phys = EFI_INVALID_TABLE_ADDR; if (efi_config_init(arch_tables) != 0) diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c index cf12a17dc289..f7984f44ff0f 100644 --- a/arch/m68k/bvme6000/rtc.c +++ b/arch/m68k/bvme6000/rtc.c @@ -15,7 +15,7 @@ #include <linux/init.h> #include <linux/poll.h> #include <linux/module.h> -#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */ +#include <linux/rtc.h> /* For struct rtc_time and ioctls, etc */ #include <linux/bcd.h> #include <asm/bvme6000hw.h> diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c index 1755e2f7137d..1cdc73268188 100644 --- a/arch/m68k/mvme16x/rtc.c +++ b/arch/m68k/mvme16x/rtc.c @@ -14,7 +14,7 @@ #include <linux/fcntl.h> #include <linux/init.h> #include <linux/poll.h> -#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */ +#include <linux/rtc.h> /* For struct rtc_time and ioctls, etc */ #include <linux/bcd.h> #include <asm/mvme16xhw.h> diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index a62581815624..88fa25fae8bd 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -61,7 +61,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ " CMPT %0, #HI(0x02000000)\n" \ " BNZ 1b\n" \ : "=&d" (temp), "=&da" (result) \ - : "da" (&v->counter), "bd" (i) \ + : "da" (&v->counter), "br" (i) \ : "cc"); \ \ smp_mb(); \ diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c index ac8c039b0318..f7b23d300881 100644 --- a/arch/metag/kernel/ftrace.c +++ b/arch/metag/kernel/ftrace.c @@ -115,7 +115,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code(ip, old, new); } -/* run from kstop_machine */ int __init ftrace_dyn_arch_init(void) { return 0; diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 2478ec6d23c9..33a365f924be 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -618,6 +618,8 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) /* Check for a core internal or performance channel event. */ if (tmp) { + /* PERF_ICORE/PERF_CHAN only exist since Meta2 */ +#ifdef METAC_2_1 void *perf_addr; /* @@ -640,6 +642,7 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) if (perf_addr) metag_out32((config & 0x0f), perf_addr); +#endif /* * Now we use the high nibble as the performance event to diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c index 315633461a94..252abc12a5a3 100644 --- a/arch/metag/kernel/perf_callchain.c +++ b/arch/metag/kernel/perf_callchain.c @@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) --frame; - while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame) + while ((entry->nr < sysctl_perf_event_max_stack) && frame) frame = user_backtrace(frame, entry); } diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index c1cf9c6c3f77..5021c546ad07 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry, addr = *sp++; if (__kernel_text_address(addr)) { perf_callchain_store(entry, addr); - if (entry->nr >= PERF_MAX_STACK_DEPTH) + if (entry->nr >= sysctl_perf_event_max_stack) break; } } @@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, } do { perf_callchain_store(entry, pc); - if (entry->nr >= PERF_MAX_STACK_DEPTH) + if (entry->nr >= sysctl_perf_event_max_stack) break; pc = unwind_stack(current, &sp, pc, &ra); } while (pc); diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h index c5a62da22cd2..ce072ba0f8dd 100644 --- a/arch/nios2/include/asm/io.h +++ b/arch/nios2/include/asm/io.h @@ -50,7 +50,6 @@ static inline void iounmap(void __iomem *addr) /* Pages to physical address... */ #define page_to_phys(page) virt_to_phys(page_to_virt(page)) -#define page_to_bus(page) page_to_virt(page) /* Macros used for converting between virtual and physical mappings. */ #define phys_to_virt(vaddr) \ diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index 4b32d6fd9d98..c1683f51ad0f 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -84,7 +84,7 @@ extern struct page *mem_map; ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) #define page_to_virt(page) \ - ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) + ((void *)(((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) # define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) # define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && \ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index a213e8c9aad0..298393c3cb42 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -209,7 +209,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - unsigned long paddr = page_to_virt(pte_page(pteval)); + unsigned long paddr = (unsigned long)page_to_virt(pte_page(pteval)); flush_dcache_range(paddr, paddr + PAGE_SIZE); set_pte(ptep, pteval); diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index e613d3673034..35bcb7cd2cde 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -81,8 +81,6 @@ typedef struct page *pgtable_t; #define virt_to_page(addr) \ (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) -#define page_to_virt(page) \ - ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8cac1eb41466..55c924b65f71 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) smp_ops->give_timebase(); /* Wait until cpu puts itself in the online & active maps */ - while (!cpu_online(cpu) || !cpu_active(cpu)) + while (!cpu_online(cpu)) cpu_relax(); return 0; diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index e04a6752b399..22d9015c1acc 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { fp = (unsigned long __user *) sp; if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) return; @@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { fp = (unsigned int __user *) (unsigned long) sp; if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) return; diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index fead491dfc28..c75e4471e618 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -90,7 +90,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline long ___down_write(struct rw_semaphore *sem) { signed long old, new, tmp; @@ -104,13 +104,23 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "m" (tmp) : "cc", "memory"); - if (old != 0) - rwsem_down_write_failed(sem); + + return old; } static inline void __down_write(struct rw_semaphore *sem) { - __down_write_nested(sem, 0); + if (___down_write(sem)) + rwsem_down_write_failed(sem); +} + +static inline int __down_write_killable(struct rw_semaphore *sem) +{ + if (___down_write(sem)) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + + return 0; } /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 40a6b4f9c36c..7b89a7572100 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) pcpu_attach_task(pcpu, tidle); pcpu_start_fn(pcpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ - while (!cpu_online(cpu) || !cpu_active(cpu)) + while (!cpu_online(cpu)) cpu_relax(); return 0; } diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c index 324599bfad14..0104c8199c48 100644 --- a/arch/sh/boards/board-sh7757lcr.c +++ b/arch/sh/boards/board-sh7757lcr.c @@ -20,7 +20,6 @@ #include <linux/mfd/tmio.h> #include <linux/mmc/host.h> #include <linux/mmc/sh_mmcif.h> -#include <linux/mmc/sh_mobile_sdhi.h> #include <linux/sh_eth.h> #include <linux/sh_intc.h> #include <linux/usb/renesas_usbhs.h> diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 62c3b81300ed..de8393cb7313 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -15,7 +15,6 @@ #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/mmc/host.h> -#include <linux/mmc/sh_mobile_sdhi.h> #include <linux/mtd/physmap.h> #include <linux/mtd/sh_flctl.h> #include <linux/mfd/tmio.h> diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index a9c0c07386fd..6d612792f6b8 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -13,7 +13,6 @@ #include <linux/platform_device.h> #include <linux/mmc/host.h> #include <linux/mmc/sh_mmcif.h> -#include <linux/mmc/sh_mobile_sdhi.h> #include <linux/mtd/physmap.h> #include <linux/mfd/tmio.h> #include <linux/gpio.h> diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index 6bd9230e64e3..5deb2d82f19f 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -11,7 +11,6 @@ #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/mmc/host.h> -#include <linux/mmc/sh_mobile_sdhi.h> #include <linux/mfd/tmio.h> #include <linux/mtd/physmap.h> #include <linux/mtd/onenand.h> diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 7a04da3efce4..5de60a77eaa1 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -13,7 +13,6 @@ #include <linux/input.h> #include <linux/input/sh_keysc.h> #include <linux/mmc/host.h> -#include <linux/mmc/sh_mobile_sdhi.h> #include <linux/mtd/physmap.h> #include <linux/mfd/tmio.h> #include <linux/mtd/nand.h> diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index e0e1df136642..f1fecd395679 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -15,7 +15,6 @@ #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/mmc/host.h> -#include <linux/mmc/sh_mobile_sdhi.h> #include <linux/mfd/tmio.h> #include <linux/mtd/physmap.h> #include <linux/delay.h> diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index a319745a7b63..751c3373a92c 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -26,6 +26,7 @@ generic-y += percpu.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h +generic-y += rwsem.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h deleted file mode 100644 index edab57265293..000000000000 --- a/arch/sh/include/asm/rwsem.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * include/asm-sh/rwsem.h: R/W semaphores for SH using the stuff - * in lib/rwsem.c. - */ - -#ifndef _ASM_SH_RWSEM_H -#define _ASM_SH_RWSEM_H - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#ifdef __KERNEL__ - -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS (-0x00010000) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - -/* - * lock for reading - */ -static inline void __down_read(struct rw_semaphore *sem) -{ - if (atomic_inc_return((atomic_t *)(&sem->count)) > 0) - smp_wmb(); - else - rwsem_down_read_failed(sem); -} - -static inline int __down_read_trylock(struct rw_semaphore *sem) -{ - int tmp; - - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg(&sem->count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { - smp_wmb(); - return 1; - } - } - return 0; -} - -/* - * lock for writing - */ -static inline void __down_write(struct rw_semaphore *sem) -{ - int tmp; - - tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_t *)(&sem->count)); - if (tmp == RWSEM_ACTIVE_WRITE_BIAS) - smp_wmb(); - else - rwsem_down_write_failed(sem); -} - -static inline int __down_write_trylock(struct rw_semaphore *sem) -{ - int tmp; - - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); - smp_wmb(); - return tmp == RWSEM_UNLOCKED_VALUE; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - int tmp; - - smp_wmb(); - tmp = atomic_dec_return((atomic_t *)(&sem->count)); - if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0) - rwsem_wake(sem); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - smp_wmb(); - if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_t *)(&sem->count)) < 0) - rwsem_wake(sem); -} - -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) -{ - atomic_add(delta, (atomic_t *)(&sem->count)); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - int tmp; - - smp_wmb(); - tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count)); - if (tmp < 0) - rwsem_downgrade_wake(sem); -} - -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) -{ - __down_write(sem); -} - -/* - * implement exchange and add functionality - */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) -{ - smp_mb(); - return atomic_add_return(delta, (atomic_t *)(&sem->count)); -} - -#endif /* __KERNEL__ */ -#endif /* _ASM_SH_RWSEM_H */ diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index e928618838bc..6024c26c0585 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += mm-arch-hooks.h generic-y += module.h generic-y += mutex.h generic-y += preempt.h +generic-y += rwsem.h generic-y += serial.h generic-y += trace_clock.h generic-y += types.h diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h deleted file mode 100644 index 069bf4d663a1..000000000000 --- a/arch/sparc/include/asm/rwsem.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * rwsem.h: R/W semaphores implemented using CAS - * - * Written by David S. Miller (davem@redhat.com), 2001. - * Derived from asm-i386/rwsem.h - */ -#ifndef _SPARC64_RWSEM_H -#define _SPARC64_RWSEM_H - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#ifdef __KERNEL__ - -#define RWSEM_UNLOCKED_VALUE 0x00000000L -#define RWSEM_ACTIVE_BIAS 0x00000001L -#define RWSEM_ACTIVE_MASK 0xffffffffL -#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - -/* - * lock for reading - */ -static inline void __down_read(struct rw_semaphore *sem) -{ - if (unlikely(atomic64_inc_return((atomic64_t *)(&sem->count)) <= 0L)) - rwsem_down_read_failed(sem); -} - -static inline int __down_read_trylock(struct rw_semaphore *sem) -{ - long tmp; - - while ((tmp = sem->count) >= 0L) { - if (tmp == cmpxchg(&sem->count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { - return 1; - } - } - return 0; -} - -/* - * lock for writing - */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) -{ - long tmp; - - tmp = atomic64_add_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic64_t *)(&sem->count)); - if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) - rwsem_down_write_failed(sem); -} - -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - -static inline int __down_write_trylock(struct rw_semaphore *sem) -{ - long tmp; - - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); - return tmp == RWSEM_UNLOCKED_VALUE; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - long tmp; - - tmp = atomic64_dec_return((atomic64_t *)(&sem->count)); - if (unlikely(tmp < -1L && (tmp & RWSEM_ACTIVE_MASK) == 0L)) - rwsem_wake(sem); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - if (unlikely(atomic64_sub_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic64_t *)(&sem->count)) < 0L)) - rwsem_wake(sem); -} - -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - atomic64_add(delta, (atomic64_t *)(&sem->count)); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - long tmp; - - tmp = atomic64_add_return(-RWSEM_WAITING_BIAS, (atomic64_t *)(&sem->count)); - if (tmp < 0L) - rwsem_downgrade_wake(sem); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return atomic64_add_return(delta, (atomic64_t *)(&sem->count)); -} - -#endif /* __KERNEL__ */ - -#endif /* _SPARC64_RWSEM_H */ diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6596f66ce112..a4b8b5aed21c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, } } #endif - } while (entry->nr < PERF_MAX_STACK_DEPTH); + } while (entry->nr < sysctl_perf_event_max_stack); } static inline int @@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; perf_callchain_store(entry, pc); - } while (entry->nr < PERF_MAX_STACK_DEPTH); + } while (entry->nr < sysctl_perf_event_max_stack); } static void perf_callchain_user_32(struct perf_callchain_entry *entry, @@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, ufp = (unsigned long)sf.fp; } perf_callchain_store(entry, pc); - } while (entry->nr < PERF_MAX_STACK_DEPTH); + } while (entry->nr < sysctl_perf_event_max_stack); } void diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2dc18605831f..7bb15747fea2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -164,10 +164,6 @@ config INSTRUCTION_DECODER def_bool y depends on KPROBES || PERF_EVENTS || UPROBES -config PERF_EVENTS_INTEL_UNCORE - def_bool y - depends on PERF_EVENTS && CPU_SUP_INTEL && PCI - config OUTPUT_FORMAT string default "elf32-i386" if X86_32 @@ -1046,6 +1042,8 @@ config X86_THERMAL_VECTOR def_bool y depends on X86_MCE_INTEL +source "arch/x86/events/Kconfig" + config X86_LEGACY_VM86 bool "Legacy VM86 support" default n @@ -1210,15 +1208,6 @@ config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE -config PERF_EVENTS_AMD_POWER - depends on PERF_EVENTS && CPU_SUP_AMD - tristate "AMD Processor Power Reporting Mechanism" - ---help--- - Provide power reporting mechanism support for AMD processors. - Currently, it leverages X86_FEATURE_ACC_POWER - (CPUID Fn8000_0007_EDX[12]) interface to calculate the - average power consumption on Family 15h processors. - config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" ---help--- @@ -1932,54 +1921,38 @@ config RELOCATABLE (CONFIG_PHYSICAL_START) is used as the minimum location. config RANDOMIZE_BASE - bool "Randomize the address of the kernel image" + bool "Randomize the address of the kernel image (KASLR)" depends on RELOCATABLE default n ---help--- - Randomizes the physical and virtual address at which the - kernel image is decompressed, as a security feature that - deters exploit attempts relying on knowledge of the location - of kernel internals. + In support of Kernel Address Space Layout Randomization (KASLR), + this randomizes the physical address at which the kernel image + is decompressed and the virtual address where the kernel + image is mapped, as a security feature that deters exploit + attempts relying on knowledge of the location of kernel + code internals. + + The kernel physical and virtual address can be randomized + from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that + using RANDOMIZE_BASE reduces the memory space available to + kernel modules from 1.5GB to 1GB.) + + Entropy is generated using the RDRAND instruction if it is + supported. If RDTSC is supported, its value is mixed into + the entropy pool as well. If neither RDRAND nor RDTSC are + supported, then entropy is read from the i8254 timer. + + Since the kernel is built using 2GB addressing, and + PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of + entropy is theoretically possible. Currently, with the + default value for PHYSICAL_ALIGN and due to page table + layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits. + + If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot + time. To enable it, boot with "kaslr" on the kernel command + line (which will also disable hibernation). - Entropy is generated using the RDRAND instruction if it is - supported. If RDTSC is supported, it is used as well. If - neither RDRAND nor RDTSC are supported, then randomness is - read from the i8254 timer. - - The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, - and aligned according to PHYSICAL_ALIGN. Since the kernel is - built using 2GiB addressing, and PHYSICAL_ALGIN must be at a - minimum of 2MiB, only 10 bits of entropy is theoretically - possible. At best, due to page table layouts, 64-bit can use - 9 bits of entropy and 32-bit uses 8 bits. - - If unsure, say N. - -config RANDOMIZE_BASE_MAX_OFFSET - hex "Maximum kASLR offset allowed" if EXPERT - depends on RANDOMIZE_BASE - range 0x0 0x20000000 if X86_32 - default "0x20000000" if X86_32 - range 0x0 0x40000000 if X86_64 - default "0x40000000" if X86_64 - ---help--- - The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical - memory is used to determine the maximal offset in bytes that will - be applied to the kernel when kernel Address Space Layout - Randomization (kASLR) is active. This must be a multiple of - PHYSICAL_ALIGN. - - On 32-bit this is limited to 512MiB by page table layouts. The - default is 512MiB. - - On 64-bit this is limited by how the kernel fixmap page table is - positioned, so this cannot be larger than 1GiB currently. Without - RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel - and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the - modules area will shrink to compensate, up to the current maximum - 1GiB to 1GiB split. The default is 1GiB. - - If unsure, leave at the default value. + If unsure, say N. # Relocation on x86 needs some additional build support config X86_NEED_RELOCS diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4086abca0b32..6fce7f096b88 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -208,7 +208,8 @@ endif head-y := arch/x86/kernel/head_$(BITS).o head-y += arch/x86/kernel/head$(BITS).o -head-y += arch/x86/kernel/head.o +head-y += arch/x86/kernel/ebda.o +head-y += arch/x86/kernel/platform-quirks.o libs-y += arch/x86/lib/ diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b1ef9e489084..700a9c6e6159 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -86,16 +86,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p' - -quiet_cmd_voffset = VOFFSET $@ - cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ - -targets += voffset.h -$(obj)/voffset.h: vmlinux FORCE - $(call if_changed,voffset) - -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ @@ -106,7 +97,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE AFLAGS_header.o += -I$(obj) -$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h +$(obj)/header.o: $(obj)/zoffset.h LDFLAGS_setup.elf := -T $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 8774cb23064f..cfdd8c3f8af2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -57,12 +57,27 @@ LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' + +quiet_cmd_voffset = VOFFSET $@ + cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ + +targets += ../voffset.h + +$(obj)/../voffset.h: vmlinux FORCE + $(call if_changed,voffset) + +$(obj)/misc.o: $(obj)/../voffset.h + vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ - $(obj)/string.o $(obj)/cmdline.o \ + $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \ $(obj)/piggy.o $(obj)/cpuflags.o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o -vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o +vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o +ifdef CONFIG_X86_64 + vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o +endif $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone @@ -109,10 +124,8 @@ suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_LZ4) := lz4 -RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ - $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) quiet_cmd_mkpiggy = MKPIGGY $@ - cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) + cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) targets += piggy.S $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c deleted file mode 100644 index 6a9b96b4624d..000000000000 --- a/arch/x86/boot/compressed/aslr.c +++ /dev/null @@ -1,339 +0,0 @@ -#include "misc.h" - -#include <asm/msr.h> -#include <asm/archrandom.h> -#include <asm/e820.h> - -#include <generated/compile.h> -#include <linux/module.h> -#include <linux/uts.h> -#include <linux/utsname.h> -#include <generated/utsrelease.h> - -/* Simplified build-specific string for starting entropy. */ -static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; - -#define I8254_PORT_CONTROL 0x43 -#define I8254_PORT_COUNTER0 0x40 -#define I8254_CMD_READBACK 0xC0 -#define I8254_SELECT_COUNTER0 0x02 -#define I8254_STATUS_NOTREADY 0x40 -static inline u16 i8254(void) -{ - u16 status, timer; - - do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); - status = inb(I8254_PORT_COUNTER0); - timer = inb(I8254_PORT_COUNTER0); - timer |= inb(I8254_PORT_COUNTER0) << 8; - } while (status & I8254_STATUS_NOTREADY); - - return timer; -} - -static unsigned long rotate_xor(unsigned long hash, const void *area, - size_t size) -{ - size_t i; - unsigned long *ptr = (unsigned long *)area; - - for (i = 0; i < size / sizeof(hash); i++) { - /* Rotate by odd number of bits and XOR. */ - hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); - hash ^= ptr[i]; - } - - return hash; -} - -/* Attempt to create a simple but unpredictable starting entropy. */ -static unsigned long get_random_boot(void) -{ - unsigned long hash = 0; - - hash = rotate_xor(hash, build_str, sizeof(build_str)); - hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); - - return hash; -} - -static unsigned long get_random_long(void) -{ -#ifdef CONFIG_X86_64 - const unsigned long mix_const = 0x5d6008cbf3848dd3UL; -#else - const unsigned long mix_const = 0x3f39e593UL; -#endif - unsigned long raw, random = get_random_boot(); - bool use_i8254 = true; - - debug_putstr("KASLR using"); - - if (has_cpuflag(X86_FEATURE_RDRAND)) { - debug_putstr(" RDRAND"); - if (rdrand_long(&raw)) { - random ^= raw; - use_i8254 = false; - } - } - - if (has_cpuflag(X86_FEATURE_TSC)) { - debug_putstr(" RDTSC"); - raw = rdtsc(); - - random ^= raw; - use_i8254 = false; - } - - if (use_i8254) { - debug_putstr(" i8254"); - random ^= i8254(); - } - - /* Circular multiply for better bit diffusion */ - asm("mul %3" - : "=a" (random), "=d" (raw) - : "a" (random), "rm" (mix_const)); - random += raw; - - debug_putstr("...\n"); - - return random; -} - -struct mem_vector { - unsigned long start; - unsigned long size; -}; - -#define MEM_AVOID_MAX 5 -static struct mem_vector mem_avoid[MEM_AVOID_MAX]; - -static bool mem_contains(struct mem_vector *region, struct mem_vector *item) -{ - /* Item at least partially before region. */ - if (item->start < region->start) - return false; - /* Item at least partially after region. */ - if (item->start + item->size > region->start + region->size) - return false; - return true; -} - -static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) -{ - /* Item one is entirely before item two. */ - if (one->start + one->size <= two->start) - return false; - /* Item one is entirely after item two. */ - if (one->start >= two->start + two->size) - return false; - return true; -} - -static void mem_avoid_init(unsigned long input, unsigned long input_size, - unsigned long output, unsigned long output_size) -{ - u64 initrd_start, initrd_size; - u64 cmd_line, cmd_line_size; - unsigned long unsafe, unsafe_len; - char *ptr; - - /* - * Avoid the region that is unsafe to overlap during - * decompression (see calculations at top of misc.c). - */ - unsafe_len = (output_size >> 12) + 32768 + 18; - unsafe = (unsigned long)input + input_size - unsafe_len; - mem_avoid[0].start = unsafe; - mem_avoid[0].size = unsafe_len; - - /* Avoid initrd. */ - initrd_start = (u64)real_mode->ext_ramdisk_image << 32; - initrd_start |= real_mode->hdr.ramdisk_image; - initrd_size = (u64)real_mode->ext_ramdisk_size << 32; - initrd_size |= real_mode->hdr.ramdisk_size; - mem_avoid[1].start = initrd_start; - mem_avoid[1].size = initrd_size; - - /* Avoid kernel command line. */ - cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; - cmd_line |= real_mode->hdr.cmd_line_ptr; - /* Calculate size of cmd_line. */ - ptr = (char *)(unsigned long)cmd_line; - for (cmd_line_size = 0; ptr[cmd_line_size++]; ) - ; - mem_avoid[2].start = cmd_line; - mem_avoid[2].size = cmd_line_size; - - /* Avoid heap memory. */ - mem_avoid[3].start = (unsigned long)free_mem_ptr; - mem_avoid[3].size = BOOT_HEAP_SIZE; - - /* Avoid stack memory. */ - mem_avoid[4].start = (unsigned long)free_mem_end_ptr; - mem_avoid[4].size = BOOT_STACK_SIZE; -} - -/* Does this memory vector overlap a known avoided area? */ -static bool mem_avoid_overlap(struct mem_vector *img) -{ - int i; - struct setup_data *ptr; - - for (i = 0; i < MEM_AVOID_MAX; i++) { - if (mem_overlaps(img, &mem_avoid[i])) - return true; - } - - /* Avoid all entries in the setup_data linked list. */ - ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; - while (ptr) { - struct mem_vector avoid; - - avoid.start = (unsigned long)ptr; - avoid.size = sizeof(*ptr) + ptr->len; - - if (mem_overlaps(img, &avoid)) - return true; - - ptr = (struct setup_data *)(unsigned long)ptr->next; - } - - return false; -} - -static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / - CONFIG_PHYSICAL_ALIGN]; -static unsigned long slot_max; - -static void slots_append(unsigned long addr) -{ - /* Overflowing the slots list should be impossible. */ - if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / - CONFIG_PHYSICAL_ALIGN) - return; - - slots[slot_max++] = addr; -} - -static unsigned long slots_fetch_random(void) -{ - /* Handle case of no slots stored. */ - if (slot_max == 0) - return 0; - - return slots[get_random_long() % slot_max]; -} - -static void process_e820_entry(struct e820entry *entry, - unsigned long minimum, - unsigned long image_size) -{ - struct mem_vector region, img; - - /* Skip non-RAM entries. */ - if (entry->type != E820_RAM) - return; - - /* Ignore entries entirely above our maximum. */ - if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) - return; - - /* Ignore entries entirely below our minimum. */ - if (entry->addr + entry->size < minimum) - return; - - region.start = entry->addr; - region.size = entry->size; - - /* Potentially raise address to minimum location. */ - if (region.start < minimum) - region.start = minimum; - - /* Potentially raise address to meet alignment requirements. */ - region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); - - /* Did we raise the address above the bounds of this e820 region? */ - if (region.start > entry->addr + entry->size) - return; - - /* Reduce size by any delta from the original address. */ - region.size -= region.start - entry->addr; - - /* Reduce maximum size to fit end of image within maximum limit. */ - if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) - region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; - - /* Walk each aligned slot and check for avoided areas. */ - for (img.start = region.start, img.size = image_size ; - mem_contains(®ion, &img) ; - img.start += CONFIG_PHYSICAL_ALIGN) { - if (mem_avoid_overlap(&img)) - continue; - slots_append(img.start); - } -} - -static unsigned long find_random_addr(unsigned long minimum, - unsigned long size) -{ - int i; - unsigned long addr; - - /* Make sure minimum is aligned. */ - minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); - - /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < real_mode->e820_entries; i++) { - process_e820_entry(&real_mode->e820_map[i], minimum, size); - } - - return slots_fetch_random(); -} - -unsigned char *choose_kernel_location(struct boot_params *boot_params, - unsigned char *input, - unsigned long input_size, - unsigned char *output, - unsigned long output_size) -{ - unsigned long choice = (unsigned long)output; - unsigned long random; - -#ifdef CONFIG_HIBERNATION - if (!cmdline_find_option_bool("kaslr")) { - debug_putstr("KASLR disabled by default...\n"); - goto out; - } -#else - if (cmdline_find_option_bool("nokaslr")) { - debug_putstr("KASLR disabled by cmdline...\n"); - goto out; - } -#endif - - boot_params->hdr.loadflags |= KASLR_FLAG; - - /* Record the various known unsafe memory ranges. */ - mem_avoid_init((unsigned long)input, input_size, - (unsigned long)output, output_size); - - /* Walk e820 and find a random address. */ - random = find_random_addr(choice, output_size); - if (!random) { - debug_putstr("KASLR could not find suitable E820 region...\n"); - goto out; - } - - /* Always enforce the minimum. */ - if (random < choice) - goto out; - - choice = random; -out: - return (unsigned char *)choice; -} diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index b68e3033e6b9..73ccf63b0f48 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -15,9 +15,9 @@ static inline char rdfs8(addr_t addr) #include "../cmdline.c" static unsigned long get_cmd_line_ptr(void) { - unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; - cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; return cmd_line_ptr; } diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 583d539a4197..52fef606bc54 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -571,312 +571,6 @@ free_handle: efi_call_early(free_pool, pci_handle); } -static void -setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, - struct efi_pixel_bitmask pixel_info, int pixel_format) -{ - if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { - si->lfb_depth = 32; - si->lfb_linelength = pixels_per_scan_line * 4; - si->red_size = 8; - si->red_pos = 0; - si->green_size = 8; - si->green_pos = 8; - si->blue_size = 8; - si->blue_pos = 16; - si->rsvd_size = 8; - si->rsvd_pos = 24; - } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { - si->lfb_depth = 32; - si->lfb_linelength = pixels_per_scan_line * 4; - si->red_size = 8; - si->red_pos = 16; - si->green_size = 8; - si->green_pos = 8; - si->blue_size = 8; - si->blue_pos = 0; - si->rsvd_size = 8; - si->rsvd_pos = 24; - } else if (pixel_format == PIXEL_BIT_MASK) { - find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); - find_bits(pixel_info.green_mask, &si->green_pos, - &si->green_size); - find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); - find_bits(pixel_info.reserved_mask, &si->rsvd_pos, - &si->rsvd_size); - si->lfb_depth = si->red_size + si->green_size + - si->blue_size + si->rsvd_size; - si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; - } else { - si->lfb_depth = 4; - si->lfb_linelength = si->lfb_width / 2; - si->red_size = 0; - si->red_pos = 0; - si->green_size = 0; - si->green_pos = 0; - si->blue_size = 0; - si->blue_pos = 0; - si->rsvd_size = 0; - si->rsvd_pos = 0; - } -} - -static efi_status_t -__gop_query32(struct efi_graphics_output_protocol_32 *gop32, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_32 *mode; - efi_status_t status; - unsigned long m; - - m = gop32->mode; - mode = (struct efi_graphics_output_protocol_mode_32 *)m; - - status = efi_early->call(gop32->query_mode, gop32, - mode->mode, size, info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; -} - -static efi_status_t -setup_gop32(struct screen_info *si, efi_guid_t *proto, - unsigned long size, void **gop_handle) -{ - struct efi_graphics_output_protocol_32 *gop32, *first_gop; - unsigned long nr_gops; - u16 width, height; - u32 pixels_per_scan_line; - u32 ext_lfb_base; - u64 fb_base; - struct efi_pixel_bitmask pixel_info; - int pixel_format; - efi_status_t status; - u32 *handles = (u32 *)(unsigned long)gop_handle; - int i; - - first_gop = NULL; - gop32 = NULL; - - nr_gops = size / sizeof(u32); - for (i = 0; i < nr_gops; i++) { - struct efi_graphics_output_mode_info *info = NULL; - efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; - bool conout_found = false; - void *dummy = NULL; - u32 h = handles[i]; - u64 current_fb_base; - - status = efi_call_early(handle_protocol, h, - proto, (void **)&gop32); - if (status != EFI_SUCCESS) - continue; - - status = efi_call_early(handle_protocol, h, - &conout_proto, &dummy); - if (status == EFI_SUCCESS) - conout_found = true; - - status = __gop_query32(gop32, &info, &size, ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found)) { - /* - * Systems that use the UEFI Console Splitter may - * provide multiple GOP devices, not all of which are - * backed by real hardware. The workaround is to search - * for a GOP implementing the ConOut protocol, and if - * one isn't found, to just fall back to the first GOP. - */ - width = info->horizontal_resolution; - height = info->vertical_resolution; - pixel_format = info->pixel_format; - pixel_info = info->pixel_information; - pixels_per_scan_line = info->pixels_per_scan_line; - fb_base = current_fb_base; - - /* - * Once we've found a GOP supporting ConOut, - * don't bother looking any further. - */ - first_gop = gop32; - if (conout_found) - break; - } - } - - /* Did we find any GOPs? */ - if (!first_gop) - goto out; - - /* EFI framebuffer */ - si->orig_video_isVGA = VIDEO_TYPE_EFI; - - si->lfb_width = width; - si->lfb_height = height; - si->lfb_base = fb_base; - - ext_lfb_base = (u64)(unsigned long)fb_base >> 32; - if (ext_lfb_base) { - si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; - si->ext_lfb_base = ext_lfb_base; - } - - si->pages = 1; - - setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); - - si->lfb_size = si->lfb_linelength * si->lfb_height; - - si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: - return status; -} - -static efi_status_t -__gop_query64(struct efi_graphics_output_protocol_64 *gop64, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_64 *mode; - efi_status_t status; - unsigned long m; - - m = gop64->mode; - mode = (struct efi_graphics_output_protocol_mode_64 *)m; - - status = efi_early->call(gop64->query_mode, gop64, - mode->mode, size, info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; -} - -static efi_status_t -setup_gop64(struct screen_info *si, efi_guid_t *proto, - unsigned long size, void **gop_handle) -{ - struct efi_graphics_output_protocol_64 *gop64, *first_gop; - unsigned long nr_gops; - u16 width, height; - u32 pixels_per_scan_line; - u32 ext_lfb_base; - u64 fb_base; - struct efi_pixel_bitmask pixel_info; - int pixel_format; - efi_status_t status; - u64 *handles = (u64 *)(unsigned long)gop_handle; - int i; - - first_gop = NULL; - gop64 = NULL; - - nr_gops = size / sizeof(u64); - for (i = 0; i < nr_gops; i++) { - struct efi_graphics_output_mode_info *info = NULL; - efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; - bool conout_found = false; - void *dummy = NULL; - u64 h = handles[i]; - u64 current_fb_base; - - status = efi_call_early(handle_protocol, h, - proto, (void **)&gop64); - if (status != EFI_SUCCESS) - continue; - - status = efi_call_early(handle_protocol, h, - &conout_proto, &dummy); - if (status == EFI_SUCCESS) - conout_found = true; - - status = __gop_query64(gop64, &info, &size, ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found)) { - /* - * Systems that use the UEFI Console Splitter may - * provide multiple GOP devices, not all of which are - * backed by real hardware. The workaround is to search - * for a GOP implementing the ConOut protocol, and if - * one isn't found, to just fall back to the first GOP. - */ - width = info->horizontal_resolution; - height = info->vertical_resolution; - pixel_format = info->pixel_format; - pixel_info = info->pixel_information; - pixels_per_scan_line = info->pixels_per_scan_line; - fb_base = current_fb_base; - - /* - * Once we've found a GOP supporting ConOut, - * don't bother looking any further. - */ - first_gop = gop64; - if (conout_found) - break; - } - } - - /* Did we find any GOPs? */ - if (!first_gop) - goto out; - - /* EFI framebuffer */ - si->orig_video_isVGA = VIDEO_TYPE_EFI; - - si->lfb_width = width; - si->lfb_height = height; - si->lfb_base = fb_base; - - ext_lfb_base = (u64)(unsigned long)fb_base >> 32; - if (ext_lfb_base) { - si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; - si->ext_lfb_base = ext_lfb_base; - } - - si->pages = 1; - - setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); - - si->lfb_size = si->lfb_linelength * si->lfb_height; - - si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: - return status; -} - -/* - * See if we have Graphics Output Protocol - */ -static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, - unsigned long size) -{ - efi_status_t status; - void **gop_handle = NULL; - - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)&gop_handle); - if (status != EFI_SUCCESS) - return status; - - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - proto, NULL, &size, gop_handle); - if (status != EFI_SUCCESS) - goto free_handle; - - if (efi_early->is64) - status = setup_gop64(si, proto, size, gop_handle); - else - status = setup_gop32(si, proto, size, gop_handle); - -free_handle: - efi_call_early(free_pool, gop_handle); - return status; -} - static efi_status_t setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) { @@ -1038,7 +732,7 @@ void setup_graphics(struct boot_params *boot_params) EFI_LOCATE_BY_PROTOCOL, &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) - status = setup_gop(si, &graphics_proto, size); + status = efi_setup_gop(NULL, si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index d487e727f1ec..c0223f1a89d7 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -11,80 +11,6 @@ #define DESC_TYPE_CODE_DATA (1 << 0) -#define EFI_CONSOLE_OUT_DEVICE_GUID \ - EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ - 0x3f, 0xc1, 0x4d) - -#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 -#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 -#define PIXEL_BIT_MASK 2 -#define PIXEL_BLT_ONLY 3 -#define PIXEL_FORMAT_MAX 4 - -struct efi_pixel_bitmask { - u32 red_mask; - u32 green_mask; - u32 blue_mask; - u32 reserved_mask; -}; - -struct efi_graphics_output_mode_info { - u32 version; - u32 horizontal_resolution; - u32 vertical_resolution; - int pixel_format; - struct efi_pixel_bitmask pixel_information; - u32 pixels_per_scan_line; -} __packed; - -struct efi_graphics_output_protocol_mode_32 { - u32 max_mode; - u32 mode; - u32 info; - u32 size_of_info; - u64 frame_buffer_base; - u32 frame_buffer_size; -} __packed; - -struct efi_graphics_output_protocol_mode_64 { - u32 max_mode; - u32 mode; - u64 info; - u64 size_of_info; - u64 frame_buffer_base; - u64 frame_buffer_size; -} __packed; - -struct efi_graphics_output_protocol_mode { - u32 max_mode; - u32 mode; - unsigned long info; - unsigned long size_of_info; - u64 frame_buffer_base; - unsigned long frame_buffer_size; -} __packed; - -struct efi_graphics_output_protocol_32 { - u32 query_mode; - u32 set_mode; - u32 blt; - u32 mode; -}; - -struct efi_graphics_output_protocol_64 { - u64 query_mode; - u64 set_mode; - u64 blt; - u64 mode; -}; - -struct efi_graphics_output_protocol { - void *query_mode; - unsigned long set_mode; - unsigned long blt; - struct efi_graphics_output_protocol_mode *mode; -}; - struct efi_uga_draw_protocol_32 { u32 get_mode; u32 set_mode; diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c new file mode 100644 index 000000000000..6248740b68b5 --- /dev/null +++ b/arch/x86/boot/compressed/error.c @@ -0,0 +1,22 @@ +/* + * Callers outside of misc.c need access to the error reporting routines, + * but the *_putstr() functions need to stay in misc.c because of how + * memcpy() and memmove() are defined for the compressed boot environment. + */ +#include "misc.h" + +void warn(char *m) +{ + error_putstr("\n\n"); + error_putstr(m); + error_putstr("\n\n"); +} + +void error(char *m) +{ + warn(m); + error_putstr(" -- System halted"); + + while (1) + asm("hlt"); +} diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h new file mode 100644 index 000000000000..2e59dac07f9e --- /dev/null +++ b/arch/x86/boot/compressed/error.h @@ -0,0 +1,7 @@ +#ifndef BOOT_COMPRESSED_ERROR_H +#define BOOT_COMPRESSED_ERROR_H + +void warn(char *m); +void error(char *m); + +#endif /* BOOT_COMPRESSED_ERROR_H */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 0256064da8da..1038524270e7 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -176,7 +176,9 @@ preferred_addr: 1: /* Target address to relocate to for decompression */ - addl $z_extract_offset, %ebx + movl BP_init_size(%esi), %eax + subl $_end, %eax + addl %eax, %ebx /* Set up the stack */ leal boot_stack_end(%ebx), %esp @@ -233,24 +235,28 @@ relocated: 2: /* - * Do the decompression, and jump to the new kernel.. + * Do the extraction, and jump to the new kernel.. */ - /* push arguments for decompress_kernel: */ - pushl $z_run_size /* size of kernel with .bss and .brk */ + /* push arguments for extract_kernel: */ pushl $z_output_len /* decompressed length, end of relocs */ - leal z_extract_offset_negative(%ebx), %ebp + + movl BP_init_size(%esi), %eax + subl $_end, %eax + movl %ebx, %ebp + subl %eax, %ebp pushl %ebp /* output address */ + pushl $z_input_len /* input_len */ leal input_data(%ebx), %eax pushl %eax /* input_data */ leal boot_heap(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call decompress_kernel /* returns kernel location in %eax */ - addl $28, %esp + call extract_kernel /* returns kernel location in %eax */ + addl $24, %esp /* - * Jump to the decompressed kernel. + * Jump to the extracted kernel. */ xorl %ebx, %ebx jmp *%eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 86558a199139..0d80a7ad65cd 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -110,7 +110,9 @@ ENTRY(startup_32) 1: /* Target address to relocate to for decompression */ - addl $z_extract_offset, %ebx + movl BP_init_size(%esi), %eax + subl $_end, %eax + addl %eax, %ebx /* * Prepare for entering 64 bit mode @@ -132,7 +134,7 @@ ENTRY(startup_32) /* Initialize Page tables to 0 */ leal pgtable(%ebx), %edi xorl %eax, %eax - movl $((4096*6)/4), %ecx + movl $(BOOT_INIT_PGT_SIZE/4), %ecx rep stosl /* Build Level 4 */ @@ -338,7 +340,9 @@ preferred_addr: 1: /* Target address to relocate to for decompression */ - leaq z_extract_offset(%rbp), %rbx + movl BP_init_size(%rsi), %ebx + subl $_end, %ebx + addq %rbp, %rbx /* Set up the stack */ leaq boot_stack_end(%rbx), %rsp @@ -408,19 +412,16 @@ relocated: 2: /* - * Do the decompression, and jump to the new kernel.. + * Do the extraction, and jump to the new kernel.. */ pushq %rsi /* Save the real mode argument */ - movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ - pushq %r9 movq %rsi, %rdi /* real mode address */ leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ leaq input_data(%rip), %rdx /* input_data */ movl $z_input_len, %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length, end of relocs */ - call decompress_kernel /* returns kernel location in %rax */ - popq %r9 + call extract_kernel /* returns kernel location in %rax */ popq %rsi /* @@ -485,4 +486,4 @@ boot_stack_end: .section ".pgtable","a",@nobits .balign 4096 pgtable: - .fill 6*4096, 1, 0 + .fill BOOT_PGT_SIZE, 1, 0 diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c new file mode 100644 index 000000000000..cfeb0259ed81 --- /dev/null +++ b/arch/x86/boot/compressed/kaslr.c @@ -0,0 +1,510 @@ +/* + * kaslr.c + * + * This contains the routines needed to generate a reasonable level of + * entropy to choose a randomized kernel base address offset in support + * of Kernel Address Space Layout Randomization (KASLR). Additionally + * handles walking the physical memory maps (and tracking memory regions + * to avoid) in order to select a physical memory location that can + * contain the entire properly aligned running kernel image. + * + */ +#include "misc.h" +#include "error.h" + +#include <asm/msr.h> +#include <asm/archrandom.h> +#include <asm/e820.h> + +#include <generated/compile.h> +#include <linux/module.h> +#include <linux/uts.h> +#include <linux/utsname.h> +#include <generated/utsrelease.h> + +/* Simplified build-specific string for starting entropy. */ +static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +static unsigned long rotate_xor(unsigned long hash, const void *area, + size_t size) +{ + size_t i; + unsigned long *ptr = (unsigned long *)area; + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +/* Attempt to create a simple but unpredictable starting entropy. */ +static unsigned long get_random_boot(void) +{ + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); + hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); + + return hash; +} + +static unsigned long get_random_long(const char *purpose) +{ +#ifdef CONFIG_X86_64 + const unsigned long mix_const = 0x5d6008cbf3848dd3UL; +#else + const unsigned long mix_const = 0x3f39e593UL; +#endif + unsigned long raw, random = get_random_boot(); + bool use_i8254 = true; + + debug_putstr(purpose); + debug_putstr(" KASLR using"); + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr(" RDRAND"); + if (rdrand_long(&raw)) { + random ^= raw; + use_i8254 = false; + } + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + debug_putstr(" RDTSC"); + raw = rdtsc(); + + random ^= raw; + use_i8254 = false; + } + + if (use_i8254) { + debug_putstr(" i8254"); + random ^= i8254(); + } + + /* Circular multiply for better bit diffusion */ + asm("mul %3" + : "=a" (random), "=d" (raw) + : "a" (random), "rm" (mix_const)); + random += raw; + + debug_putstr("...\n"); + + return random; +} + +struct mem_vector { + unsigned long start; + unsigned long size; +}; + +enum mem_avoid_index { + MEM_AVOID_ZO_RANGE = 0, + MEM_AVOID_INITRD, + MEM_AVOID_CMDLINE, + MEM_AVOID_BOOTPARAMS, + MEM_AVOID_MAX, +}; + +static struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_contains(struct mem_vector *region, struct mem_vector *item) +{ + /* Item at least partially before region. */ + if (item->start < region->start) + return false; + /* Item at least partially after region. */ + if (item->start + item->size > region->start + region->size) + return false; + return true; +} + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +/* + * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). + * The mem_avoid array is used to store the ranges that need to be avoided + * when KASLR searches for an appropriate random address. We must avoid any + * regions that are unsafe to overlap with during decompression, and other + * things like the initrd, cmdline and boot_params. This comment seeks to + * explain mem_avoid as clearly as possible since incorrect mem_avoid + * memory ranges lead to really hard to debug boot failures. + * + * The initrd, cmdline, and boot_params are trivial to identify for + * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and + * MEM_AVOID_BOOTPARAMS respectively below. + * + * What is not obvious how to avoid is the range of memory that is used + * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover + * the compressed kernel (ZO) and its run space, which is used to extract + * the uncompressed kernel (VO) and relocs. + * + * ZO's full run size sits against the end of the decompression buffer, so + * we can calculate where text, data, bss, etc of ZO are positioned more + * easily. + * + * For additional background, the decompression calculations can be found + * in header.S, and the memory diagram is based on the one found in misc.c. + * + * The following conditions are already enforced by the image layouts and + * associated code: + * - input + input_size >= output + output_size + * - kernel_total_size <= init_size + * - kernel_total_size <= output_size (see Note below) + * - output + init_size >= output + output_size + * + * (Note that kernel_total_size and output_size have no fundamental + * relationship, but output_size is passed to choose_random_location + * as a maximum of the two. The diagram is showing a case where + * kernel_total_size is larger than output_size, but this case is + * handled by bumping output_size.) + * + * The above conditions can be illustrated by a diagram: + * + * 0 output input input+input_size output+init_size + * | | | | | + * | | | | | + * |-----|--------|--------|--------------|-----------|--|-------------| + * | | | + * | | | + * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size + * + * [output, output+init_size) is the entire memory range used for + * extracting the compressed image. + * + * [output, output+kernel_total_size) is the range needed for the + * uncompressed kernel (VO) and its run size (bss, brk, etc). + * + * [output, output+output_size) is VO plus relocs (i.e. the entire + * uncompressed payload contained by ZO). This is the area of the buffer + * written to during decompression. + * + * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case + * range of the copied ZO and decompression code. (i.e. the range + * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) + * + * [input, input+input_size) is the original copied compressed image (ZO) + * (i.e. it does not include its run size). This range must be avoided + * because it contains the data used for decompression. + * + * [input+input_size, output+init_size) is [_text, _end) for ZO. This + * range includes ZO's heap and stack, and must be avoided since it + * performs the decompression. + * + * Since the above two ranges need to be avoided and they are adjacent, + * they can be merged, resulting in: [input, output+init_size) which + * becomes the MEM_AVOID_ZO_RANGE below. + */ +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output) +{ + unsigned long init_size = boot_params->hdr.init_size; + u64 initrd_start, initrd_size; + u64 cmd_line, cmd_line_size; + char *ptr; + + /* + * Avoid the region that is unsafe to overlap during + * decompression. + */ + mem_avoid[MEM_AVOID_ZO_RANGE].start = input; + mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; + add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, + mem_avoid[MEM_AVOID_ZO_RANGE].size); + + /* Avoid initrd. */ + initrd_start = (u64)boot_params->ext_ramdisk_image << 32; + initrd_start |= boot_params->hdr.ramdisk_image; + initrd_size = (u64)boot_params->ext_ramdisk_size << 32; + initrd_size |= boot_params->hdr.ramdisk_size; + mem_avoid[MEM_AVOID_INITRD].start = initrd_start; + mem_avoid[MEM_AVOID_INITRD].size = initrd_size; + /* No need to set mapping for initrd, it will be handled in VO. */ + + /* Avoid kernel command line. */ + cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; + cmd_line |= boot_params->hdr.cmd_line_ptr; + /* Calculate size of cmd_line. */ + ptr = (char *)(unsigned long)cmd_line; + for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + ; + mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; + mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; + add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, + mem_avoid[MEM_AVOID_CMDLINE].size); + + /* Avoid boot parameters. */ + mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; + mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); + add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, + mem_avoid[MEM_AVOID_BOOTPARAMS].size); + + /* We don't need to set a mapping for setup_data. */ + +#ifdef CONFIG_X86_VERBOSE_BOOTUP + /* Make sure video RAM can be used. */ + add_identity_map(0, PMD_SIZE); +#endif +} + +/* + * Does this memory vector overlap a known avoided area? If so, record the + * overlap region with the lowest address. + */ +static bool mem_avoid_overlap(struct mem_vector *img, + struct mem_vector *overlap) +{ + int i; + struct setup_data *ptr; + unsigned long earliest = img->start + img->size; + bool is_overlapping = false; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i]) && + mem_avoid[i].start < earliest) { + *overlap = mem_avoid[i]; + is_overlapping = true; + } + } + + /* Avoid all entries in the setup_data linked list. */ + ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; + while (ptr) { + struct mem_vector avoid; + + avoid.start = (unsigned long)ptr; + avoid.size = sizeof(*ptr) + ptr->len; + + if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { + *overlap = avoid; + is_overlapping = true; + } + + ptr = (struct setup_data *)(unsigned long)ptr->next; + } + + return is_overlapping; +} + +static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN]; + +struct slot_area { + unsigned long addr; + int num; +}; + +#define MAX_SLOT_AREA 100 + +static struct slot_area slot_areas[MAX_SLOT_AREA]; + +static unsigned long slot_max; + +static unsigned long slot_area_index; + +static void store_slot_info(struct mem_vector *region, unsigned long image_size) +{ + struct slot_area slot_area; + + if (slot_area_index == MAX_SLOT_AREA) + return; + + slot_area.addr = region->start; + slot_area.num = (region->size - image_size) / + CONFIG_PHYSICAL_ALIGN + 1; + + if (slot_area.num > 0) { + slot_areas[slot_area_index++] = slot_area; + slot_max += slot_area.num; + } +} + +static void slots_append(unsigned long addr) +{ + /* Overflowing the slots list should be impossible. */ + if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN) + return; + + slots[slot_max++] = addr; +} + +static unsigned long slots_fetch_random(void) +{ + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + return slots[get_random_long("Physical") % slot_max]; +} + +static void process_e820_entry(struct e820entry *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, img, overlap; + + /* Skip non-RAM entries. */ + if (entry->type != E820_RAM) + return; + + /* Ignore entries entirely above our maximum. */ + if (entry->addr >= KERNEL_IMAGE_SIZE) + return; + + /* Ignore entries entirely below our minimum. */ + if (entry->addr + entry->size < minimum) + return; + + region.start = entry->addr; + region.size = entry->size; + + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; + + /* Potentially raise address to meet alignment requirements. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the bounds of this e820 region? */ + if (region.start > entry->addr + entry->size) + return; + + /* Reduce size by any delta from the original address. */ + region.size -= region.start - entry->addr; + + /* Reduce maximum size to fit end of image within maximum limit. */ + if (region.start + region.size > KERNEL_IMAGE_SIZE) + region.size = KERNEL_IMAGE_SIZE - region.start; + + /* Walk each aligned slot and check for avoided areas. */ + for (img.start = region.start, img.size = image_size ; + mem_contains(®ion, &img) ; + img.start += CONFIG_PHYSICAL_ALIGN) { + if (mem_avoid_overlap(&img, &overlap)) + continue; + slots_append(img.start); + } +} + +static unsigned long find_random_phys_addr(unsigned long minimum, + unsigned long image_size) +{ + int i; + unsigned long addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < boot_params->e820_entries; i++) { + process_e820_entry(&boot_params->e820_map[i], minimum, + image_size); + } + + return slots_fetch_random(); +} + +static unsigned long find_random_virt_addr(unsigned long minimum, + unsigned long image_size) +{ + unsigned long slots, random_addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + /* Align image_size for easy slot calculations. */ + image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN); + + /* + * There are how many CONFIG_PHYSICAL_ALIGN-sized slots + * that can hold image_size within the range of minimum to + * KERNEL_IMAGE_SIZE? + */ + slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / + CONFIG_PHYSICAL_ALIGN + 1; + + random_addr = get_random_long("Virtual") % slots; + + return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; +} + +/* + * Since this function examines addresses much more numerically, + * it takes the input and output pointers as 'unsigned long'. + */ +unsigned char *choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long output, + unsigned long output_size) +{ + unsigned long choice = output; + unsigned long random_addr; + +#ifdef CONFIG_HIBERNATION + if (!cmdline_find_option_bool("kaslr")) { + warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected)."); + goto out; + } +#else + if (cmdline_find_option_bool("nokaslr")) { + warn("KASLR disabled: 'nokaslr' on cmdline."); + goto out; + } +#endif + + boot_params->hdr.loadflags |= KASLR_FLAG; + + /* Record the various known unsafe memory ranges. */ + mem_avoid_init(input, input_size, output); + + /* Walk e820 and find a random address. */ + random_addr = find_random_phys_addr(output, output_size); + if (!random_addr) { + warn("KASLR disabled: could not find suitable E820 region!"); + goto out; + } + + /* Always enforce the minimum. */ + if (random_addr < choice) + goto out; + + choice = random_addr; + + add_identity_map(choice, output_size); + + /* This actually loads the identity pagetable on x86_64. */ + finalize_identity_maps(); +out: + return (unsigned char *)choice; +} diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 79dac1758e7c..f14db4e21654 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -1,8 +1,10 @@ /* * misc.c * - * This is a collection of several routines from gzip-1.0.3 - * adapted for Linux. + * This is a collection of several routines used to extract the kernel + * which includes KASLR relocation, decompression, ELF parsing, and + * relocation processing. Additionally included are the screen and serial + * output functions and related debugging support functions. * * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 * puts by Nick Holloway 1993, better puts by Martin Mares 1995 @@ -10,111 +12,37 @@ */ #include "misc.h" +#include "error.h" #include "../string.h" - -/* WARNING!! - * This code is compiled with -fPIC and it is relocated dynamically - * at run time, but no relocation processing is performed. - * This means that it is not safe to place pointers in static structures. - */ +#include "../voffset.h" /* - * Getting to provable safe in place decompression is hard. - * Worst case behaviours need to be analyzed. - * Background information: - * - * The file layout is: - * magic[2] - * method[1] - * flags[1] - * timestamp[4] - * extraflags[1] - * os[1] - * compressed data blocks[N] - * crc[4] orig_len[4] - * - * resulting in 18 bytes of non compressed data overhead. - * - * Files divided into blocks - * 1 bit (last block flag) - * 2 bits (block type) - * - * 1 block occurs every 32K -1 bytes or when there 50% compression - * has been achieved. The smallest block type encoding is always used. - * - * stored: - * 32 bits length in bytes. - * - * fixed: - * magic fixed tree. - * symbols. - * - * dynamic: - * dynamic tree encoding. - * symbols. - * - * - * The buffer for decompression in place is the length of the - * uncompressed data, plus a small amount extra to keep the algorithm safe. - * The compressed data is placed at the end of the buffer. The output - * pointer is placed at the start of the buffer and the input pointer - * is placed where the compressed data starts. Problems will occur - * when the output pointer overruns the input pointer. - * - * The output pointer can only overrun the input pointer if the input - * pointer is moving faster than the output pointer. A condition only - * triggered by data whose compressed form is larger than the uncompressed - * form. - * - * The worst case at the block level is a growth of the compressed data - * of 5 bytes per 32767 bytes. - * - * The worst case internal to a compressed block is very hard to figure. - * The worst case can at least be boundined by having one bit that represents - * 32764 bytes and then all of the rest of the bytes representing the very - * very last byte. - * - * All of which is enough to compute an amount of extra data that is required - * to be safe. To avoid problems at the block level allocating 5 extra bytes - * per 32767 bytes of data is sufficient. To avoind problems internal to a - * block adding an extra 32767 bytes (the worst case uncompressed block size) - * is sufficient, to ensure that in the worst case the decompressed data for - * block will stop the byte before the compressed data for a block begins. - * To avoid problems with the compressed data's meta information an extra 18 - * bytes are needed. Leading to the formula: - * - * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. - * - * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. - * Adding 32768 instead of 32767 just makes for round numbers. - * Adding the decompressor_size is necessary as it musht live after all - * of the data as well. Last I measured the decompressor is about 14K. - * 10K of actual data and 4K of bss. - * + * WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically at + * run time, but no relocation processing is performed. This means that + * it is not safe to place pointers in static structures. */ -/* - * gzip declarations - */ +/* Macros used by the included decompressor code below. */ #define STATIC static -#undef memcpy - /* - * Use a normal definition of memset() from string.c. There are already + * Use normal definitions of mem*() from string.c. There are already * included header files which expect a definition of memset() and by * the time we define memset macro, it is too late. */ +#undef memcpy #undef memset #define memzero(s, n) memset((s), 0, (n)) +#define memmove memmove - -static void error(char *m); +/* Functions used by the included decompressor code below. */ +void *memmove(void *dest, const void *src, size_t n); /* * This is set up by the setup-routine at boot-time */ -struct boot_params *real_mode; /* Pointer to real-mode data */ +struct boot_params *boot_params; memptr free_mem_ptr; memptr free_mem_end_ptr; @@ -146,12 +74,16 @@ static int lines, cols; #ifdef CONFIG_KERNEL_LZ4 #include "../../../../lib/decompress_unlz4.c" #endif +/* + * NOTE: When adding a new decompressor, please update the analysis in + * ../header.S. + */ static void scroll(void) { int i; - memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); + memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) vidmem[i] = ' '; } @@ -184,12 +116,12 @@ void __putstr(const char *s) } } - if (real_mode->screen_info.orig_video_mode == 0 && + if (boot_params->screen_info.orig_video_mode == 0 && lines == 0 && cols == 0) return; - x = real_mode->screen_info.orig_x; - y = real_mode->screen_info.orig_y; + x = boot_params->screen_info.orig_x; + y = boot_params->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -210,8 +142,8 @@ void __putstr(const char *s) } } - real_mode->screen_info.orig_x = x; - real_mode->screen_info.orig_y = y; + boot_params->screen_info.orig_x = x; + boot_params->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -237,23 +169,13 @@ void __puthex(unsigned long value) } } -static void error(char *x) -{ - error_putstr("\n\n"); - error_putstr(x); - error_putstr("\n\n -- System halted"); - - while (1) - asm("hlt"); -} - #if CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len) { int *reloc; unsigned long delta, map, ptr; unsigned long min_addr = (unsigned long)output; - unsigned long max_addr = min_addr + output_len; + unsigned long max_addr = min_addr + (VO___bss_start - VO__text); /* * Calculate the delta between where vmlinux was linked to load @@ -295,7 +217,7 @@ static void handle_relocations(void *output, unsigned long output_len) * So we work backwards from the end of the decompressed image. */ for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { - int extended = *reloc; + long extended = *reloc; extended += map; ptr = (unsigned long)extended; @@ -372,9 +294,7 @@ static void parse_elf(void *output) #else dest = (void *)(phdr->p_paddr); #endif - memcpy(dest, - output + phdr->p_offset, - phdr->p_filesz); + memmove(dest, output + phdr->p_offset, phdr->p_filesz); break; default: /* Ignore other PT_* */ break; } @@ -383,23 +303,41 @@ static void parse_elf(void *output) free(phdrs); } -asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, +/* + * The compressed kernel image (ZO), has been moved so that its position + * is against the end of the buffer used to hold the uncompressed kernel + * image (VO) and the execution environment (.bss, .brk), which makes sure + * there is room to do the in-place decompression. (See header.S for the + * calculations.) + * + * |-----compressed kernel image------| + * V V + * 0 extract_offset +INIT_SIZE + * |-----------|---------------|-------------------------|--------| + * | | | | + * VO__text startup_32 of ZO VO__end ZO__end + * ^ ^ + * |-------uncompressed kernel image---------| + * + */ +asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, - unsigned long output_len, - unsigned long run_size) + unsigned long output_len) { + const unsigned long kernel_total_size = VO__end - VO__text; unsigned char *output_orig = output; - real_mode = rmode; + /* Retain x86 boot parameters pointer passed from startup_32/64. */ + boot_params = rmode; - /* Clear it for solely in-kernel use */ - real_mode->hdr.loadflags &= ~KASLR_FLAG; + /* Clear flags intended for solely in-kernel use. */ + boot_params->hdr.loadflags &= ~KASLR_FLAG; - sanitize_boot_params(real_mode); + sanitize_boot_params(boot_params); - if (real_mode->screen_info.orig_video_mode == 7) { + if (boot_params->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -407,11 +345,11 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = real_mode->screen_info.orig_video_lines; - cols = real_mode->screen_info.orig_video_cols; + lines = boot_params->screen_info.orig_video_lines; + cols = boot_params->screen_info.orig_video_cols; console_init(); - debug_putstr("early console in decompress_kernel\n"); + debug_putstr("early console in extract_kernel\n"); free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; @@ -421,16 +359,16 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, debug_putaddr(input_len); debug_putaddr(output); debug_putaddr(output_len); - debug_putaddr(run_size); + debug_putaddr(kernel_total_size); /* * The memory hole needed for the kernel is the larger of either * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_kernel_location(real_mode, input_data, input_len, output, - output_len > run_size ? output_len - : run_size); + output = choose_random_location((unsigned long)input_data, input_len, + (unsigned long)output, + max(output_len, kernel_total_size)); /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 3783dc3e10b3..b6fec1ff10e4 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -32,7 +32,7 @@ /* misc.c */ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; -extern struct boot_params *real_mode; /* Pointer to real-mode data */ +extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); #define error_putstr(__x) __putstr(__x) @@ -66,26 +66,35 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE -/* aslr.c */ -unsigned char *choose_kernel_location(struct boot_params *boot_params, - unsigned char *input, +/* kaslr.c */ +unsigned char *choose_random_location(unsigned long input_ptr, unsigned long input_size, - unsigned char *output, + unsigned long output_ptr, unsigned long output_size); /* cpuflags.c */ bool has_cpuflag(int flag); #else static inline -unsigned char *choose_kernel_location(struct boot_params *boot_params, - unsigned char *input, +unsigned char *choose_random_location(unsigned long input_ptr, unsigned long input_size, - unsigned char *output, + unsigned long output_ptr, unsigned long output_size) { - return output; + return (unsigned char *)output_ptr; } #endif +#ifdef CONFIG_X86_64 +void add_identity_map(unsigned long start, unsigned long size); +void finalize_identity_maps(void); +extern unsigned char _pgtable[]; +#else +static inline void add_identity_map(unsigned long start, unsigned long size) +{ } +static inline void finalize_identity_maps(void) +{ } +#endif + #ifdef CONFIG_EARLY_PRINTK /* early_serial_console.c */ extern int early_serial_base; diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index d8222f213182..72bad2c8debe 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -18,11 +18,10 @@ * * H. Peter Anvin <hpa@linux.intel.com> * - * ----------------------------------------------------------------------- */ - -/* - * Compute the desired load offset from a compressed program; outputs - * a small assembly wrapper with the appropriate symbols defined. + * ----------------------------------------------------------------------- + * + * Outputs a small assembly wrapper with the appropriate symbols defined. + * */ #include <stdlib.h> @@ -35,14 +34,11 @@ int main(int argc, char *argv[]) { uint32_t olen; long ilen; - unsigned long offs; - unsigned long run_size; FILE *f = NULL; int retval = 1; - if (argc < 3) { - fprintf(stderr, "Usage: %s compressed_file run_size\n", - argv[0]); + if (argc < 2) { + fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); goto bail; } @@ -67,29 +63,11 @@ int main(int argc, char *argv[]) ilen = ftell(f); olen = get_unaligned_le32(&olen); - /* - * Now we have the input (compressed) and output (uncompressed) - * sizes, compute the necessary decompression offset... - */ - - offs = (olen > ilen) ? olen - ilen : 0; - offs += olen >> 12; /* Add 8 bytes for each 32K block */ - offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ - offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ - run_size = atoi(argv[2]); - printf(".section \".rodata..compressed\",\"a\",@progbits\n"); printf(".globl z_input_len\n"); printf("z_input_len = %lu\n", ilen); printf(".globl z_output_len\n"); printf("z_output_len = %lu\n", (unsigned long)olen); - printf(".globl z_extract_offset\n"); - printf("z_extract_offset = 0x%lx\n", offs); - /* z_extract_offset_negative allows simplification of head_32.S */ - printf(".globl z_extract_offset_negative\n"); - printf("z_extract_offset_negative = -0x%lx\n", offs); - printf(".globl z_run_size\n"); - printf("z_run_size = %lu\n", run_size); printf(".globl input_data, input_data_end\n"); printf("input_data:\n"); diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c new file mode 100644 index 000000000000..34b95df14e69 --- /dev/null +++ b/arch/x86/boot/compressed/pagetable.c @@ -0,0 +1,129 @@ +/* + * This code is used on x86_64 to create page table identity mappings on + * demand by building up a new set of page tables (or appending to the + * existing ones), and then switching over to them when ready. + */ + +/* + * Since we're dealing with identity mappings, physical and virtual + * addresses are the same, so override these defines which are ultimately + * used by the headers in misc.h. + */ +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)((unsigned long)(x))) + +#include "misc.h" + +/* These actually do the work of building the kernel identity maps. */ +#include <asm/init.h> +#include <asm/pgtable.h> +#include "../../mm/ident_map.c" + +/* Used by pgtable.h asm code to force instruction serialization. */ +unsigned long __force_order; + +/* Used to track our page table allocation area. */ +struct alloc_pgt_data { + unsigned char *pgt_buf; + unsigned long pgt_buf_size; + unsigned long pgt_buf_offset; +}; + +/* + * Allocates space for a page table entry, using struct alloc_pgt_data + * above. Besides the local callers, this is used as the allocation + * callback in mapping_info below. + */ +static void *alloc_pgt_page(void *context) +{ + struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; + unsigned char *entry; + + /* Validate there is space available for a new page. */ + if (pages->pgt_buf_offset >= pages->pgt_buf_size) { + debug_putstr("out of pgt_buf in " __FILE__ "!?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + return NULL; + } + + entry = pages->pgt_buf + pages->pgt_buf_offset; + pages->pgt_buf_offset += PAGE_SIZE; + + return entry; +} + +/* Used to track our allocated page tables. */ +static struct alloc_pgt_data pgt_data; + +/* The top level page table entry pointer. */ +static unsigned long level4p; + +/* Locates and clears a region for a new top level page table. */ +static void prepare_level4(void) +{ + /* + * It should be impossible for this not to already be true, + * but since calling this a second time would rewind the other + * counters, let's just make sure this is reset too. + */ + pgt_data.pgt_buf_offset = 0; + + /* + * If we came here via startup_32(), cr3 will be _pgtable already + * and we must append to the existing area instead of entirely + * overwriting it. + */ + level4p = read_cr3(); + if (level4p == (unsigned long)_pgtable) { + debug_putstr("booted via startup_32()\n"); + pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + } else { + debug_putstr("booted via startup_64()\n"); + pgt_data.pgt_buf = _pgtable; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + level4p = (unsigned long)alloc_pgt_page(&pgt_data); + } +} + +/* + * Adds the specified range to what will become the new identity mappings. + * Once all ranges have been added, the new mapping is activated by calling + * finalize_identity_maps() below. + */ +void add_identity_map(unsigned long start, unsigned long size) +{ + struct x86_mapping_info mapping_info = { + .alloc_pgt_page = alloc_pgt_page, + .context = &pgt_data, + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + }; + unsigned long end = start + size; + + /* Make sure we have a top level page table ready to use. */ + if (!level4p) + prepare_level4(); + + /* Align boundary to 2M. */ + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + if (start >= end) + return; + + /* Build the mapping. */ + kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p, + start, end); +} + +/* + * This switches the page tables to the new level4 that has been built + * via calls to add_identity_map() above. If booted via startup_32(), + * this is effectively a no-op. + */ +void finalize_identity_maps(void) +{ + write_cr3(level4p); +} diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 00e788be1db9..cea140ce6b42 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c @@ -1,7 +1,16 @@ +/* + * This provides an optimized implementation of memcpy, and a simplified + * implementation of memset and memmove. These are used here because the + * standard kernel runtime versions are not yet available and we don't + * trust the gcc built-in implementations as they may do unexpected things + * (e.g. FPU ops) in the minimal decompression stub execution environment. + */ +#include "error.h" + #include "../string.c" #ifdef CONFIG_X86_32 -void *memcpy(void *dest, const void *src, size_t n) +static void *__memcpy(void *dest, const void *src, size_t n) { int d0, d1, d2; asm volatile( @@ -15,7 +24,7 @@ void *memcpy(void *dest, const void *src, size_t n) return dest; } #else -void *memcpy(void *dest, const void *src, size_t n) +static void *__memcpy(void *dest, const void *src, size_t n) { long d0, d1, d2; asm volatile( @@ -39,3 +48,27 @@ void *memset(void *s, int c, size_t n) ss[i] = c; return s; } + +void *memmove(void *dest, const void *src, size_t n) +{ + unsigned char *d = dest; + const unsigned char *s = src; + + if (d <= s || d - s >= n) + return __memcpy(dest, src, n); + + while (n-- > 0) + d[n] = s[n]; + + return dest; +} + +/* Detect and warn about potential overlaps, but handle them with memmove. */ +void *memcpy(void *dest, const void *src, size_t n) +{ + if (dest > src && dest - src < n) { + warn("Avoiding potentially unsafe overlapping memcpy()!"); + return memmove(dest, src, n); + } + return __memcpy(dest, src, n); +} diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 34d047c98284..e24e0a0c90c9 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -70,5 +70,6 @@ SECTIONS _epgtable = . ; } #endif + . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */ _end = .; } diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 45a07684bbab..f0b8d6d93164 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -1,3 +1,7 @@ +/* + * Serial port routines for use during early boot reporting. This code is + * included from both the compressed kernel and the regular kernel. + */ #include "boot.h" #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6236b9ec4b76..3dd5be33aaa7 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -440,13 +440,116 @@ setup_data: .quad 0 # 64-bit physical pointer to pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr -#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) +# +# Getting to provably safe in-place decompression is hard. Worst case +# behaviours need to be analyzed. Here let's take the decompression of +# a gzip-compressed kernel as example, to illustrate it: +# +# The file layout of gzip compressed kernel is: +# +# magic[2] +# method[1] +# flags[1] +# timestamp[4] +# extraflags[1] +# os[1] +# compressed data blocks[N] +# crc[4] orig_len[4] +# +# ... resulting in +18 bytes overhead of uncompressed data. +# +# (For more information, please refer to RFC 1951 and RFC 1952.) +# +# Files divided into blocks +# 1 bit (last block flag) +# 2 bits (block type) +# +# 1 block occurs every 32K -1 bytes or when there 50% compression +# has been achieved. The smallest block type encoding is always used. +# +# stored: +# 32 bits length in bytes. +# +# fixed: +# magic fixed tree. +# symbols. +# +# dynamic: +# dynamic tree encoding. +# symbols. +# +# +# The buffer for decompression in place is the length of the uncompressed +# data, plus a small amount extra to keep the algorithm safe. The +# compressed data is placed at the end of the buffer. The output pointer +# is placed at the start of the buffer and the input pointer is placed +# where the compressed data starts. Problems will occur when the output +# pointer overruns the input pointer. +# +# The output pointer can only overrun the input pointer if the input +# pointer is moving faster than the output pointer. A condition only +# triggered by data whose compressed form is larger than the uncompressed +# form. +# +# The worst case at the block level is a growth of the compressed data +# of 5 bytes per 32767 bytes. +# +# The worst case internal to a compressed block is very hard to figure. +# The worst case can at least be bounded by having one bit that represents +# 32764 bytes and then all of the rest of the bytes representing the very +# very last byte. +# +# All of which is enough to compute an amount of extra data that is required +# to be safe. To avoid problems at the block level allocating 5 extra bytes +# per 32767 bytes of data is sufficient. To avoid problems internal to a +# block adding an extra 32767 bytes (the worst case uncompressed block size) +# is sufficient, to ensure that in the worst case the decompressed data for +# block will stop the byte before the compressed data for a block begins. +# To avoid problems with the compressed data's meta information an extra 18 +# bytes are needed. Leading to the formula: +# +# extra_bytes = (uncompressed_size >> 12) + 32768 + 18 +# +# Adding 8 bytes per 32K is a bit excessive but much easier to calculate. +# Adding 32768 instead of 32767 just makes for round numbers. +# +# Above analysis is for decompressing gzip compressed kernel only. Up to +# now 6 different decompressor are supported all together. And among them +# xz stores data in chunks and has maximum chunk of 64K. Hence safety +# margin should be updated to cover all decompressors so that we don't +# need to deal with each of them separately. Please check +# the description in lib/decompressor_xxx.c for specific information. +# +# extra_bytes = (uncompressed_size >> 12) + 65536 + 128 + +#define ZO_z_extra_bytes ((ZO_z_output_len >> 12) + 65536 + 128) +#if ZO_z_output_len > ZO_z_input_len +# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \ + ZO_z_input_len) +#else +# define ZO_z_extract_offset ZO_z_extra_bytes +#endif + +/* + * The extract_offset has to be bigger than ZO head section. Otherwise when + * the head code is running to move ZO to the end of the buffer, it will + * overwrite the head code itself. + */ +#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset +# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095) +#else +# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095) +#endif + +#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset) + #define VO_INIT_SIZE (VO__end - VO__text) #if ZO_INIT_SIZE > VO_INIT_SIZE -#define INIT_SIZE ZO_INIT_SIZE +# define INIT_SIZE ZO_INIT_SIZE #else -#define INIT_SIZE VO_INIT_SIZE +# define INIT_SIZE VO_INIT_SIZE #endif + init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c diff --git a/arch/x86/configs/kvm_guest.config b/arch/x86/configs/kvm_guest.config index f9affcc3b9f1..9906505c998a 100644 --- a/arch/x86/configs/kvm_guest.config +++ b/arch/x86/configs/kvm_guest.config @@ -26,3 +26,6 @@ CONFIG_VIRTIO_NET=y CONFIG_9P_FS=y CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y +CONFIG_SCSI_LOWLEVEL=y +CONFIG_SCSI_VIRTIO=y +CONFIG_VIRTIO_INPUT=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 4f404a64681b..0c8d7963483c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -173,6 +173,7 @@ CONFIG_TIGON3=y CONFIG_NET_TULIP=y CONFIG_E100=y CONFIG_E1000=y +CONFIG_E1000E=y CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 064c7e2bd7c8..5b7fa1471007 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1477,7 +1477,7 @@ static int __init aesni_init(void) } aesni_ctr_enc_tfm = aesni_ctr_enc; #ifdef CONFIG_AS_AVX - if (cpu_has_avx) { + if (boot_cpu_has(X86_FEATURE_AVX)) { /* optimize performance of ctr mode encryption transform */ aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; pr_info("AES CTR mode by8 optimization enabled\n"); diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index d84456924563..60907c139c4e 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -562,7 +562,10 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 or AES-NI instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 93d8f295784e..d96429da88eb 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -554,7 +554,9 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX or AES-NI instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 8baaff5af0b5..2d5c2e0bd939 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -129,7 +129,8 @@ static int __init chacha20_simd_mod_init(void) return -ENODEV; #ifdef CONFIG_AS_AVX2 - chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 && + chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif return crypto_register_alg(&alg); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 4264a3d59589..e32142bc071d 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -179,11 +179,12 @@ static struct shash_alg alg = { static int __init poly1305_simd_mod_init(void) { - if (!cpu_has_xmm2) + if (!boot_cpu_has(X86_FEATURE_XMM2)) return -ENODEV; #ifdef CONFIG_AS_AVX2 - poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 && + poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); alg.descsize = sizeof(struct poly1305_simd_desc_ctx); if (poly1305_use_avx2) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 6d198342e2de..870f6d812a2d 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -538,7 +538,7 @@ static int __init init(void) { const char *feature_name; - if (!cpu_has_avx2 || !cpu_has_osxsave) { + if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 8943407e8917..644f97ab8cac 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -600,7 +600,7 @@ static struct crypto_alg serpent_algs[10] = { { static int __init serpent_sse2_init(void) { - if (!cpu_has_xmm2) { + if (!boot_cpu_has(X86_FEATURE_XMM2)) { printk(KERN_INFO "SSE2 instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index 081255cea1ee..9c5af331a956 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -102,14 +102,14 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)(struct sha1_mb_mgr *st static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state); static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state); -inline void sha1_init_digest(uint32_t *digest) +static inline void sha1_init_digest(uint32_t *digest) { static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; memcpy(digest, initial_digest, sizeof(initial_digest)); } -inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], +static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint32_t total_len) { uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index dd14616b7739..1024e378a358 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -166,7 +166,7 @@ static struct shash_alg sha1_avx_alg = { static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (cpu_has_avx) + if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 5f4d6086dc59..3ae0f43ebd37 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -201,7 +201,7 @@ static struct shash_alg sha256_avx_algs[] = { { static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (cpu_has_avx) + if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 34e5083d6f36..0b17c83d027d 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -151,7 +151,7 @@ asmlinkage void sha512_transform_avx(u64 *digest, const char *data, static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (cpu_has_avx) + if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index e79d93d44ecd..ec138e538c44 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, long syscall_trace_enter(struct pt_regs *regs) { - u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); if (phase1_result == 0) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 10868aa734dc..983e5d3a0d27 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -207,10 +207,7 @@ ENTRY(ret_from_fork) pushl %eax call schedule_tail - GET_THREAD_INFO(%ebp) popl %eax - pushl $0x0202 # Reset kernel eflags - popfl /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax @@ -221,10 +218,7 @@ END(ret_from_fork) ENTRY(ret_from_kernel_thread) pushl %eax call schedule_tail - GET_THREAD_INFO(%ebp) popl %eax - pushl $0x0202 # Reset kernel eflags - popfl movl PT_EBP(%esp), %eax call *PT_EBX(%esp) movl $0, PT_EAX(%esp) @@ -251,7 +245,6 @@ ENDPROC(ret_from_kernel_thread) ret_from_exception: preempt_stop(CLBR_ANY) ret_from_intr: - GET_THREAD_INFO(%ebp) #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 858b555e274b..9ee0da1807ed 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -372,9 +372,6 @@ END(ptregs_\func) ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK, TI_flags(%r8) - pushq $0x0002 - popfq /* reset kernel eflags */ - call schedule_tail /* rdi: 'prev' task parameter */ testb $3, CS(%rsp) /* from kernel_thread? */ @@ -781,19 +778,25 @@ ENTRY(native_load_gs_index) pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS -gs_change: +.Lgs_change: movl %edi, %gs -2: mfence /* workaround */ +2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS popfq ret END(native_load_gs_index) - _ASM_EXTABLE(gs_change, bad_gs) + _ASM_EXTABLE(.Lgs_change, bad_gs) .section .fixup, "ax" /* running with kernelgs */ bad_gs: SWAPGS /* switch back to user gs */ +.macro ZAP_GS + /* This can't be a string because the preprocessor needs to see it. */ + movl $__USER_DS, %eax + movl %eax, %gs +.endm + ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG xorl %eax, %eax movl %eax, %gs jmp 2b @@ -1019,13 +1022,13 @@ ENTRY(error_entry) movl %ecx, %eax /* zero extend */ cmpq %rax, RIP+8(%rsp) je .Lbstep_iret - cmpq $gs_change, RIP+8(%rsp) + cmpq $.Lgs_change, RIP+8(%rsp) jne .Lerror_entry_done /* - * hack: gs_change can fail with user gsbase. If this happens, fix up + * hack: .Lgs_change can fail with user gsbase. If this happens, fix up * gsbase and proceed. We'll fix up the exception and land in - * gs_change's error handler with kernel gsbase. + * .Lgs_change's error handler with kernel gsbase. */ jmp .Lerror_entry_from_usermode_swapgs diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 847f2f0c31e5..e1721dafbcb1 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -72,24 +72,23 @@ ENTRY(entry_SYSENTER_compat) pushfq /* pt_regs->flags (except IF = 0) */ orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ pushq $__USER32_CS /* pt_regs->cs */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->ip = 0 (placeholder) */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ cld /* @@ -205,17 +204,16 @@ ENTRY(entry_SYSCALL_compat) pushq %rdx /* pt_regs->dx */ pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -316,11 +314,10 @@ ENTRY(entry_INT80_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp */ pushq %r12 /* pt_regs->r12 */ diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index b30dd8154cc2..4cddd17153fb 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -384,5 +384,5 @@ 375 i386 membarrier sys_membarrier 376 i386 mlock2 sys_mlock2 377 i386 copy_file_range sys_copy_file_range -378 i386 preadv2 sys_preadv2 -379 i386 pwritev2 sys_pwritev2 +378 i386 preadv2 sys_preadv2 compat_sys_preadv2 +379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index cac6d17ce5db..555263e385c9 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -374,3 +374,5 @@ 543 x32 io_setup compat_sys_io_setup 544 x32 io_submit compat_sys_io_submit 545 x32 execveat compat_sys_execveat/ptregs +534 x32 preadv2 compat_sys_preadv2 +535 x32 pwritev2 compat_sys_pwritev2 diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 03c3eb77bfce..2f02d23a05ef 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -13,7 +13,6 @@ #include <uapi/linux/time.h> #include <asm/vgtod.h> -#include <asm/hpet.h> #include <asm/vvar.h> #include <asm/unistd.h> #include <asm/msr.h> @@ -28,16 +27,6 @@ extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_HPET_TIMER -extern u8 hpet_page - __attribute__((visibility("hidden"))); - -static notrace cycle_t vread_hpet(void) -{ - return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); -} -#endif - #ifdef CONFIG_PARAVIRT_CLOCK extern u8 pvclock_page __attribute__((visibility("hidden"))); @@ -195,10 +184,6 @@ notrace static inline u64 vgetsns(int *mode) if (gtod->vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); -#ifdef CONFIG_HPET_TIMER - else if (gtod->vclock_mode == VCLOCK_HPET) - cycles = vread_hpet(); -#endif #ifdef CONFIG_PARAVIRT_CLOCK else if (gtod->vclock_mode == VCLOCK_PVCLOCK) cycles = vread_pvclock(mode); diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 4158acc17df0..a708aa90b507 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -25,7 +25,7 @@ SECTIONS * segment. */ - vvar_start = . - 3 * PAGE_SIZE; + vvar_start = . - 2 * PAGE_SIZE; vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ @@ -35,8 +35,7 @@ SECTIONS #undef __VVAR_KERNEL_LDS #undef EMIT_VVAR - hpet_page = vvar_start + PAGE_SIZE; - pvclock_page = vvar_start + 2 * PAGE_SIZE; + pvclock_page = vvar_start + PAGE_SIZE; . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 10f704584922..b3cf81333a54 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -18,7 +18,6 @@ #include <asm/vdso.h> #include <asm/vvar.h> #include <asm/page.h> -#include <asm/hpet.h> #include <asm/desc.h> #include <asm/cpufeature.h> @@ -129,16 +128,6 @@ static int vvar_fault(const struct vm_special_mapping *sm, if (sym_offset == image->sym_vvar_page) { ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, __pa_symbol(&__vvar_page) >> PAGE_SHIFT); - } else if (sym_offset == image->sym_hpet_page) { -#ifdef CONFIG_HPET_TIMER - if (hpet_address && vclock_was_used(VCLOCK_HPET)) { - ret = vm_insert_pfn_prot( - vma, - (unsigned long)vmf->virtual_address, - hpet_address >> PAGE_SHIFT, - pgprot_noncached(PAGE_READONLY)); - } -#endif } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = pvclock_pvti_cpu0_va(); diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig new file mode 100644 index 000000000000..98397db5ceae --- /dev/null +++ b/arch/x86/events/Kconfig @@ -0,0 +1,36 @@ +menu "Performance monitoring" + +config PERF_EVENTS_INTEL_UNCORE + tristate "Intel uncore performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + ---help--- + Include support for Intel uncore performance events. These are + available on NehalemEX and more modern processors. + +config PERF_EVENTS_INTEL_RAPL + tristate "Intel rapl performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + ---help--- + Include support for Intel rapl performance events for power + monitoring on modern processors. + +config PERF_EVENTS_INTEL_CSTATE + tristate "Intel cstate performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + ---help--- + Include support for Intel cstate performance events for power + monitoring on modern processors. + +config PERF_EVENTS_AMD_POWER + depends on PERF_EVENTS && CPU_SUP_AMD + tristate "AMD Processor Power Reporting Mechanism" + ---help--- + Provide power reporting mechanism support for AMD processors. + Currently, it leverages X86_FEATURE_ACC_POWER + (CPUID Fn8000_0007_EDX[12]) interface to calculate the + average power consumption on Family 15h processors. + +endmenu diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index f59618a39990..1d392c39fe56 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -6,9 +6,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o ifdef CONFIG_AMD_IOMMU obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o endif -obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o + +obj-$(CONFIG_CPU_SUP_INTEL) += msr.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 3db9569e658c..98ac57381bf9 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -263,6 +263,7 @@ static const struct attribute_group *amd_uncore_attr_groups[] = { }; static struct pmu amd_nb_pmu = { + .task_ctx_nr = perf_invalid_context, .attr_groups = amd_uncore_attr_groups, .name = "amd_nb", .event_init = amd_uncore_event_init, @@ -274,6 +275,7 @@ static struct pmu amd_nb_pmu = { }; static struct pmu amd_l2_pmu = { + .task_ctx_nr = perf_invalid_context, .attr_groups = amd_uncore_attr_groups, .name = "amd_l2", .event_init = amd_uncore_event_init, diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 041e442a3e28..73a75aa5a66d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -360,6 +360,9 @@ int x86_add_exclusive(unsigned int what) { int i; + if (x86_pmu.lbr_pt_coexist) + return 0; + if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { mutex_lock(&pmc_reserve_mutex); for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { @@ -380,6 +383,9 @@ fail_unlock: void x86_del_exclusive(unsigned int what) { + if (x86_pmu.lbr_pt_coexist) + return; + atomic_dec(&x86_pmu.lbr_exclusive[what]); atomic_dec(&active_events); } @@ -1518,7 +1524,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) static void __init pmu_check_apic(void) { - if (cpu_has_apic) + if (boot_cpu_has(X86_FEATURE_APIC)) return; x86_pmu.apic = 0; @@ -2177,7 +2183,7 @@ void arch_perf_update_userpage(struct perf_event *event, * cap_user_time_zero doesn't make sense when we're using a different * time base for the records. */ - if (event->clock == &local_clock) { + if (!event->attr.use_clockid) { userpg->cap_user_time_zero = 1; userpg->time_zero = data->cyc2ns_offset; } @@ -2277,7 +2283,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) fp = compat_ptr(ss_base + regs->bp); pagefault_disable(); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { unsigned long bytes; frame.next_frame = 0; frame.return_address = 0; @@ -2337,7 +2343,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) return; pagefault_disable(); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { unsigned long bytes; frame.next_frame = NULL; frame.return_address = 0; diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile new file mode 100644 index 000000000000..3660b2cf245a --- /dev/null +++ b/arch/x86/events/intel/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o +obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o +intel-rapl-objs := rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o +intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o +obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o +intel-cstate-objs := cstate.o diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index b99dc9258c0f..0a6e393a2e62 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -171,18 +171,6 @@ static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head) memset(page_address(phys->page) + index, 0, phys->size - index); } -static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts) -{ - if (buf->snapshot) - return false; - - if (local_read(&buf->data_size) >= bts->handle.size || - bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE) - return true; - - return false; -} - static void bts_update(struct bts_ctx *bts) { int cpu = raw_smp_processor_id(); @@ -213,18 +201,15 @@ static void bts_update(struct bts_ctx *bts) } } +static int +bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct bts_buffer *buf = perf_get_aux(&bts->handle); u64 config = 0; - if (!buf || bts_buffer_is_full(buf, bts)) - return; - - event->hw.itrace_started = 1; - event->hw.state = 0; - if (!buf->snapshot) config |= ARCH_PERFMON_EVENTSEL_INT; if (!event->attr.exclude_kernel) @@ -241,16 +226,41 @@ static void __bts_event_start(struct perf_event *event) wmb(); intel_pmu_enable_bts(config); + } static void bts_event_start(struct perf_event *event, int flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf; + + buf = perf_aux_output_begin(&bts->handle, event); + if (!buf) + goto fail_stop; + + if (bts_buffer_reset(buf, &bts->handle)) + goto fail_end_stop; + + bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; + bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; + bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; + + event->hw.itrace_started = 1; + event->hw.state = 0; __bts_event_start(event); /* PMI handler: this counter is running and likely generating PMIs */ ACCESS_ONCE(bts->started) = 1; + + return; + +fail_end_stop: + perf_aux_output_end(&bts->handle, 0, false); + +fail_stop: + event->hw.state = PERF_HES_STOPPED; } static void __bts_event_stop(struct perf_event *event) @@ -269,15 +279,32 @@ static void __bts_event_stop(struct perf_event *event) static void bts_event_stop(struct perf_event *event, int flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf = perf_get_aux(&bts->handle); /* PMI handler: don't restart this counter */ ACCESS_ONCE(bts->started) = 0; __bts_event_stop(event); - if (flags & PERF_EF_UPDATE) + if (flags & PERF_EF_UPDATE) { bts_update(bts); + + if (buf) { + if (buf->snapshot) + bts->handle.head = + local_xchg(&buf->data_size, + buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), + !!local_xchg(&buf->lost, 0)); + } + + cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum; + cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold; + } } void intel_bts_enable_local(void) @@ -417,34 +444,14 @@ int intel_bts_interrupt(void) static void bts_event_del(struct perf_event *event, int mode) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); - bts_event_stop(event, PERF_EF_UPDATE); - - if (buf) { - if (buf->snapshot) - bts->handle.head = - local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), - !!local_xchg(&buf->lost, 0)); - } - - cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; - cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base; - cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum; - cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold; } static int bts_event_add(struct perf_event *event, int mode) { - struct bts_buffer *buf; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int ret = -EBUSY; event->hw.state = PERF_HES_STOPPED; @@ -454,26 +461,10 @@ static int bts_event_add(struct perf_event *event, int mode) if (bts->handle.event) return -EBUSY; - buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return -EINVAL; - - ret = bts_buffer_reset(buf, &bts->handle); - if (ret) { - perf_aux_output_end(&bts->handle, 0, false); - return ret; - } - - bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; - bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; - bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; - if (mode & PERF_EF_START) { bts_event_start(event, 0); - if (hwc->state & PERF_HES_STOPPED) { - bts_event_del(event, 0); - return -EBUSY; - } + if (hwc->state & PERF_HES_STOPPED) + return -EINVAL; } return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a6fd4dbcf820..7c666958a625 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids }, }; +static struct extra_reg intel_glm_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1), + EVENT_EXTRA_END +}; + +#define GLM_DEMAND_DATA_RD BIT_ULL(0) +#define GLM_DEMAND_RFO BIT_ULL(1) +#define GLM_ANY_RESPONSE BIT_ULL(16) +#define GLM_SNP_NONE_OR_MISS BIT_ULL(33) +#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD +#define GLM_DEMAND_WRITE GLM_DEMAND_RFO +#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) +#define GLM_LLC_ACCESS GLM_ANY_RESPONSE +#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM) +#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM) + +static __initconst const u64 glm_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ + [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ + [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +static __initconst const u64 glm_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_READ| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_READ| + GLM_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_WRITE| + GLM_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH| + GLM_LLC_MISS, + }, + }, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -3447,7 +3581,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_atom(); + intel_pmu_lbr_init_slm(); x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; @@ -3456,6 +3590,30 @@ __init int intel_pmu_init(void) pr_cont("Silvermont events, "); break; + case 92: /* 14nm Atom "Goldmont" */ + case 95: /* 14nm Atom "Goldmont Denverton" */ + memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; + x86_pmu.extra_regs = intel_glm_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + * :pp is identical to :ppp + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + pr_cont("Goldmont events, "); + break; + case 37: /* 32nm Westmere */ case 44: /* 32nm Westmere-EP */ case 47: /* 32nm Westmere-EX */ @@ -3708,7 +3866,7 @@ __init int intel_pmu_init(void) c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } c->idxmsk64 &= - ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); c->weight = hweight64(c->idxmsk64); } } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 7946c4231169..9ba4e4136a15 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -91,6 +91,8 @@ #include <asm/cpu_device_id.h> #include "../perf_event.h" +MODULE_LICENSE("GPL"); + #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ @@ -106,22 +108,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf); +/* Model -> events mapping */ +struct cstate_model { + unsigned long core_events; + unsigned long pkg_events; + unsigned long quirks; +}; + +/* Quirk flags */ +#define SLM_PKG_C6_USE_C7_MSR (1UL << 0) + struct perf_cstate_msr { u64 msr; struct perf_pmu_events_attr *attr; - bool (*test)(int idx); }; /* cstate_core PMU */ - static struct pmu cstate_core_pmu; static bool has_cstate_core; -enum perf_cstate_core_id { - /* - * cstate_core events - */ +enum perf_cstate_core_events { PERF_CSTATE_CORE_C1_RES = 0, PERF_CSTATE_CORE_C3_RES, PERF_CSTATE_CORE_C6_RES, @@ -130,69 +137,16 @@ enum perf_cstate_core_id { PERF_CSTATE_CORE_EVENT_MAX, }; -bool test_core(int idx) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 != 6) - return false; - - switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - if (idx == PERF_CSTATE_CORE_C3_RES || - idx == PERF_CSTATE_CORE_C6_RES) - return true; - break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - if (idx == PERF_CSTATE_CORE_C3_RES || - idx == PERF_CSTATE_CORE_C6_RES || - idx == PERF_CSTATE_CORE_C7_RES) - return true; - break; - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ - if (idx == PERF_CSTATE_CORE_C1_RES || - idx == PERF_CSTATE_CORE_C6_RES) - return true; - break; - } - - return false; -} - PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); static struct perf_cstate_msr core_msr[] = { - [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, }, - [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, }, - [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, }, - [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, }, + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 }, }; static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { @@ -234,18 +188,11 @@ static const struct attribute_group *core_attr_groups[] = { NULL, }; -/* cstate_core PMU end */ - - /* cstate_pkg PMU */ - static struct pmu cstate_pkg_pmu; static bool has_cstate_pkg; -enum perf_cstate_pkg_id { - /* - * cstate_pkg events - */ +enum perf_cstate_pkg_events { PERF_CSTATE_PKG_C2_RES = 0, PERF_CSTATE_PKG_C3_RES, PERF_CSTATE_PKG_C6_RES, @@ -257,69 +204,6 @@ enum perf_cstate_pkg_id { PERF_CSTATE_PKG_EVENT_MAX, }; -bool test_pkg(int idx) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 != 6) - return false; - - switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - if (idx == PERF_CSTATE_CORE_C3_RES || - idx == PERF_CSTATE_CORE_C6_RES || - idx == PERF_CSTATE_CORE_C7_RES) - return true; - break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - if (idx == PERF_CSTATE_PKG_C2_RES || - idx == PERF_CSTATE_PKG_C3_RES || - idx == PERF_CSTATE_PKG_C6_RES || - idx == PERF_CSTATE_PKG_C7_RES) - return true; - break; - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ - if (idx == PERF_CSTATE_CORE_C6_RES) - return true; - break; - case 69: /* 22nm Haswell ULT */ - if (idx == PERF_CSTATE_PKG_C2_RES || - idx == PERF_CSTATE_PKG_C3_RES || - idx == PERF_CSTATE_PKG_C6_RES || - idx == PERF_CSTATE_PKG_C7_RES || - idx == PERF_CSTATE_PKG_C8_RES || - idx == PERF_CSTATE_PKG_C9_RES || - idx == PERF_CSTATE_PKG_C10_RES) - return true; - break; - } - - return false; -} - PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); @@ -329,13 +213,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); static struct perf_cstate_msr pkg_msr[] = { - [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, }, - [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, }, - [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, }, - [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, }, - [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, }, - [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, }, - [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, }, + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 }, }; static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { @@ -366,8 +250,6 @@ static const struct attribute_group *pkg_attr_groups[] = { NULL, }; -/* cstate_pkg PMU end*/ - static ssize_t cstate_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) @@ -385,7 +267,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev, static int cstate_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config; - int ret = 0; + int cpu; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -400,26 +282,36 @@ static int cstate_pmu_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (event->cpu < 0) + return -EINVAL; + if (event->pmu == &cstate_core_pmu) { if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) return -EINVAL; if (!core_msr[cfg].attr) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; + cpu = cpumask_any_and(&cstate_core_cpu_mask, + topology_sibling_cpumask(event->cpu)); } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; - } else + cpu = cpumask_any_and(&cstate_pkg_cpu_mask, + topology_core_cpumask(event->cpu)); + } else { return -ENOENT; + } + + if (cpu >= nr_cpu_ids) + return -ENODEV; - /* must be done before validate_group */ + event->cpu = cpu; event->hw.config = cfg; event->hw.idx = -1; - - return ret; + return 0; } static inline u64 cstate_pmu_read_counter(struct perf_event *event) @@ -469,172 +361,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) return 0; } +/* + * Check if exiting cpu is the designated reader. If so migrate the + * events when there is a valid target available + */ static void cstate_cpu_exit(int cpu) { - int i, id, target; + unsigned int target; - /* cpu exit for cstate core */ - if (has_cstate_core) { - id = topology_core_id(cpu); - target = -1; - - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (id == topology_core_id(i)) { - target = i; - break; - } - } - if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0) + if (has_cstate_core && + cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) { + + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_core_cpu_mask); - WARN_ON(cpumask_empty(&cstate_core_cpu_mask)); - if (target >= 0) perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); + } } - /* cpu exit for cstate pkg */ - if (has_cstate_pkg) { - id = topology_physical_package_id(cpu); - target = -1; - - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (id == topology_physical_package_id(i)) { - target = i; - break; - } - } - if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0) + if (has_cstate_pkg && + cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { + + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_pkg_cpu_mask); - WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask)); - if (target >= 0) perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); + } } } static void cstate_cpu_init(int cpu) { - int i, id; + unsigned int target; - /* cpu init for cstate core */ - if (has_cstate_core) { - id = topology_core_id(cpu); - for_each_cpu(i, &cstate_core_cpu_mask) { - if (id == topology_core_id(i)) - break; - } - if (i >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_core_cpu_mask); - } + /* + * If this is the first online thread of that core, set it in + * the core cpu mask as the designated reader. + */ + target = cpumask_any_and(&cstate_core_cpu_mask, + topology_sibling_cpumask(cpu)); - /* cpu init for cstate pkg */ - if (has_cstate_pkg) { - id = topology_physical_package_id(cpu); - for_each_cpu(i, &cstate_pkg_cpu_mask) { - if (id == topology_physical_package_id(i)) - break; - } - if (i >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); - } + if (has_cstate_core && target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_core_cpu_mask); + + /* + * If this is the first online thread of that package, set it + * in the package cpu mask as the designated reader. + */ + target = cpumask_any_and(&cstate_pkg_cpu_mask, + topology_core_cpumask(cpu)); + if (has_cstate_pkg && target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); } static int cstate_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) + unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - break; case CPU_STARTING: cstate_cpu_init(cpu); break; - case CPU_UP_CANCELED: - case CPU_DYING: - break; - case CPU_ONLINE: - case CPU_DEAD: - break; case CPU_DOWN_PREPARE: cstate_cpu_exit(cpu); break; default: break; } - return NOTIFY_OK; } -/* - * Probe the cstate events and insert the available one into sysfs attrs - * Return false if there is no available events. - */ -static bool cstate_probe_msr(struct perf_cstate_msr *msr, - struct attribute **events_attrs, - int max_event_nr) -{ - int i, j = 0; - u64 val; - - /* Probe the cstate events. */ - for (i = 0; i < max_event_nr; i++) { - if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) - msr[i].attr = NULL; - } - - /* List remaining events in the sysfs attrs. */ - for (i = 0; i < max_event_nr; i++) { - if (msr[i].attr) - events_attrs[j++] = &msr[i].attr->attr.attr; - } - events_attrs[j] = NULL; - - return (j > 0) ? true : false; -} - -static int __init cstate_init(void) -{ - /* SLM has different MSR for PKG C6 */ - switch (boot_cpu_data.x86_model) { - case 55: - case 76: - case 77: - pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; - } - - if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX)) - has_cstate_core = true; - - if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX)) - has_cstate_pkg = true; - - return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; -} - -static void __init cstate_cpumask_init(void) -{ - int cpu; - - cpu_notifier_register_begin(); - - for_each_online_cpu(cpu) - cstate_cpu_init(cpu); - - __perf_cpu_notifier(cstate_cpu_notifier); - - cpu_notifier_register_done(); -} +static struct notifier_block cstate_cpu_nb = { + .notifier_call = cstate_cpu_notifier, + .priority = CPU_PRI_PERF + 1, +}; static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, .name = "cstate_core", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, - .add = cstate_pmu_event_add, /* must have */ - .del = cstate_pmu_event_del, /* must have */ + .add = cstate_pmu_event_add, + .del = cstate_pmu_event_del, .start = cstate_pmu_event_start, .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, @@ -646,49 +457,203 @@ static struct pmu cstate_pkg_pmu = { .name = "cstate_pkg", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, - .add = cstate_pmu_event_add, /* must have */ - .del = cstate_pmu_event_del, /* must have */ + .add = cstate_pmu_event_add, + .del = cstate_pmu_event_del, .start = cstate_pmu_event_start, .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; -static void __init cstate_pmus_register(void) +static const struct cstate_model nhm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES), +}; + +static const struct cstate_model snb_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES), +}; + +static const struct cstate_model hswult_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + +static const struct cstate_model slm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = SLM_PKG_C6_USE_C7_MSR, +}; + +#define X86_CSTATES_MODEL(model, states) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } + +static const struct x86_cpu_id intel_cstates_match[] __initconst = { + X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */ + X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */ + X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */ + + X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */ + X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */ + X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */ + + X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */ + X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */ + + X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */ + X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */ + + X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */ + X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */ + X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */ + + X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */ + + X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */ + X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */ + X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */ + + X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */ + X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */ + X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */ + X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */ + + X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */ + X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */ + { }, +}; +MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); + +/* + * Probe the cstate events and insert the available one into sysfs attrs + * Return false if there are no available events. + */ +static bool __init cstate_probe_msr(const unsigned long evmsk, int max, + struct perf_cstate_msr *msr, + struct attribute **attrs) { - int err; + bool found = false; + unsigned int bit; + u64 val; + + for (bit = 0; bit < max; bit++) { + if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) { + *attrs++ = &msr[bit].attr->attr.attr; + found = true; + } else { + msr[bit].attr = NULL; + } + } + *attrs = NULL; + + return found; +} + +static int __init cstate_probe(const struct cstate_model *cm) +{ + /* SLM has different MSR for PKG C6 */ + if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) + pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + + has_cstate_core = cstate_probe_msr(cm->core_events, + PERF_CSTATE_CORE_EVENT_MAX, + core_msr, core_events_attrs); + + has_cstate_pkg = cstate_probe_msr(cm->pkg_events, + PERF_CSTATE_PKG_EVENT_MAX, + pkg_msr, pkg_events_attrs); + + return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; +} + +static inline void cstate_cleanup(void) +{ + if (has_cstate_core) + perf_pmu_unregister(&cstate_core_pmu); + + if (has_cstate_pkg) + perf_pmu_unregister(&cstate_pkg_pmu); +} + +static int __init cstate_init(void) +{ + int cpu, err; + + cpu_notifier_register_begin(); + for_each_online_cpu(cpu) + cstate_cpu_init(cpu); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); - if (WARN_ON(err)) - pr_info("Failed to register PMU %s error %d\n", - cstate_core_pmu.name, err); + if (err) { + has_cstate_core = false; + pr_info("Failed to register cstate core pmu\n"); + goto out; + } } if (has_cstate_pkg) { err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1); - if (WARN_ON(err)) - pr_info("Failed to register PMU %s error %d\n", - cstate_pkg_pmu.name, err); + if (err) { + has_cstate_pkg = false; + pr_info("Failed to register cstate pkg pmu\n"); + cstate_cleanup(); + goto out; + } } + __register_cpu_notifier(&cstate_cpu_nb); +out: + cpu_notifier_register_done(); + return err; } static int __init cstate_pmu_init(void) { + const struct x86_cpu_id *id; int err; - if (cpu_has_hypervisor) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(intel_cstates_match); + if (!id) return -ENODEV; - err = cstate_init(); + err = cstate_probe((const struct cstate_model *) id->driver_data); if (err) return err; - cstate_cpumask_init(); - - cstate_pmus_register(); - - return 0; + return cstate_init(); } +module_init(cstate_pmu_init); -device_initcall(cstate_pmu_init); +static void __exit cstate_pmu_exit(void) +{ + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&cstate_cpu_nb); + cstate_cleanup(); + cpu_notifier_register_done(); +} +module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8584b90d8e0b..7ce9f3f669e6 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_glm_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), + EVENT_CONSTRAINT_END +}; + struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 1ca5d1e7d4f2..9e2b40cdb05f 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -14,7 +14,8 @@ enum { LBR_FORMAT_EIP_FLAGS = 0x03, LBR_FORMAT_EIP_FLAGS2 = 0x04, LBR_FORMAT_INFO = 0x05, - LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO, + LBR_FORMAT_TIME = 0x06, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, }; static enum { @@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) abort = !!(info & LBR_INFO_ABORT); cycles = (info & LBR_INFO_CYCLES); } + + if (lbr_format == LBR_FORMAT_TIME) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + skip = 1; + cycles = ((to >> 48) & LBR_INFO_CYCLES); + + to = (u64)((((s64)to) << 16) >> 16); + } + if (lbr_flags & LBR_EIP_FLAGS) { mis = !!(from & LBR_FROM_FLAG_MISPRED); pred = !mis; @@ -1049,6 +1060,24 @@ void __init intel_pmu_lbr_init_atom(void) pr_cont("8-deep LBR, "); } +/* slm */ +void __init intel_pmu_lbr_init_slm(void) +{ + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("8-deep LBR, "); +} + /* Knights Landing */ void intel_pmu_lbr_init_knl(void) { diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 09a77dbc73c9..04bb5fb5a8d7 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -67,11 +67,13 @@ static struct pt_cap_desc { PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff), PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)), PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)), + PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)), PT_CAP(mtc, 0, CR_EBX, BIT(3)), PT_CAP(topa_output, 0, CR_ECX, BIT(0)), PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)), PT_CAP(single_range_output, 0, CR_ECX, BIT(2)), PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)), + PT_CAP(num_address_ranges, 1, CR_EAX, 0x3), PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000), PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff), PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000), @@ -125,9 +127,46 @@ static struct attribute_group pt_format_group = { .attrs = pt_formats_attr, }; +static ssize_t +pt_timing_attr_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + switch (pmu_attr->id) { + case 0: + return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio); + case 1: + return sprintf(page, "%u:%u\n", + pt_pmu.tsc_art_num, + pt_pmu.tsc_art_den); + default: + break; + } + + return -EINVAL; +} + +PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0, + pt_timing_attr_show); +PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1, + pt_timing_attr_show); + +static struct attribute *pt_timing_attr[] = { + &timing_attr_max_nonturbo_ratio.attr.attr, + &timing_attr_tsc_art_ratio.attr.attr, + NULL, +}; + +static struct attribute_group pt_timing_group = { + .attrs = pt_timing_attr, +}; + static const struct attribute_group *pt_attr_groups[] = { &pt_cap_group, &pt_format_group, + &pt_timing_group, NULL, }; @@ -140,6 +179,23 @@ static int __init pt_pmu_hw_init(void) int ret; long i; + rdmsrl(MSR_PLATFORM_INFO, reg); + pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8; + + /* + * if available, read in TSC to core crystal clock ratio, + * otherwise, zero for numerator stands for "not enumerated" + * as per SDM + */ + if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) { + u32 eax, ebx, ecx, edx; + + cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx); + + pt_pmu.tsc_art_num = ebx; + pt_pmu.tsc_art_den = eax; + } + if (boot_cpu_has(X86_FEATURE_VMX)) { /* * Intel SDM, 36.5 "Tracing post-VMXON" says that @@ -263,6 +319,75 @@ static bool pt_event_valid(struct perf_event *event) * These all are cpu affine and operate on a local PT */ +/* Address ranges and their corresponding msr configuration registers */ +static const struct pt_address_range { + unsigned long msr_a; + unsigned long msr_b; + unsigned int reg_off; +} pt_address_ranges[] = { + { + .msr_a = MSR_IA32_RTIT_ADDR0_A, + .msr_b = MSR_IA32_RTIT_ADDR0_B, + .reg_off = RTIT_CTL_ADDR0_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR1_A, + .msr_b = MSR_IA32_RTIT_ADDR1_B, + .reg_off = RTIT_CTL_ADDR1_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR2_A, + .msr_b = MSR_IA32_RTIT_ADDR2_B, + .reg_off = RTIT_CTL_ADDR2_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR3_A, + .msr_b = MSR_IA32_RTIT_ADDR3_B, + .reg_off = RTIT_CTL_ADDR3_OFFSET, + } +}; + +static u64 pt_config_filters(struct perf_event *event) +{ + struct pt_filters *filters = event->hw.addr_filters; + struct pt *pt = this_cpu_ptr(&pt_ctx); + unsigned int range = 0; + u64 rtit_ctl = 0; + + if (!filters) + return 0; + + perf_event_addr_filters_sync(event); + + for (range = 0; range < filters->nr_filters; range++) { + struct pt_filter *filter = &filters->filter[range]; + + /* + * Note, if the range has zero start/end addresses due + * to its dynamic object not being loaded yet, we just + * go ahead and program zeroed range, which will simply + * produce no data. Note^2: if executable code at 0x0 + * is a concern, we can set up an "invalid" configuration + * such as msr_b < msr_a. + */ + + /* avoid redundant msr writes */ + if (pt->filters.filter[range].msr_a != filter->msr_a) { + wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a); + pt->filters.filter[range].msr_a = filter->msr_a; + } + + if (pt->filters.filter[range].msr_b != filter->msr_b) { + wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b); + pt->filters.filter[range].msr_b = filter->msr_b; + } + + rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + } + + return rtit_ctl; +} + static void pt_config(struct perf_event *event) { u64 reg; @@ -272,7 +397,8 @@ static void pt_config(struct perf_event *event) wrmsrl(MSR_IA32_RTIT_STATUS, 0); } - reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; + reg = pt_config_filters(event); + reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; if (!event->attr.exclude_kernel) reg |= RTIT_CTL_OS; @@ -709,6 +835,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, /* clear STOP and INT from current entry */ buf->topa_index[buf->stop_pos]->stop = 0; + buf->topa_index[buf->stop_pos]->intr = 0; buf->topa_index[buf->intr_pos]->intr = 0; /* how many pages till the STOP marker */ @@ -733,6 +860,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, buf->intr_pos = idx; buf->topa_index[buf->stop_pos]->stop = 1; + buf->topa_index[buf->stop_pos]->intr = 1; buf->topa_index[buf->intr_pos]->intr = 1; return 0; @@ -919,24 +1047,80 @@ static void pt_buffer_free_aux(void *data) kfree(buf); } -/** - * pt_buffer_is_full() - check if the buffer is full - * @buf: PT buffer. - * @pt: Per-cpu pt handle. - * - * If the user hasn't read data from the output region that aux_head - * points to, the buffer is considered full: the user needs to read at - * least this region and update aux_tail to point past it. - */ -static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt) +static int pt_addr_filters_init(struct perf_event *event) { - if (buf->snapshot) - return false; + struct pt_filters *filters; + int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); + + if (!pt_cap_get(PT_CAP_num_address_ranges)) + return 0; + + filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node); + if (!filters) + return -ENOMEM; + + if (event->parent) + memcpy(filters, event->parent->hw.addr_filters, + sizeof(*filters)); + + event->hw.addr_filters = filters; + + return 0; +} + +static void pt_addr_filters_fini(struct perf_event *event) +{ + kfree(event->hw.addr_filters); + event->hw.addr_filters = NULL; +} + +static int pt_event_addr_filters_validate(struct list_head *filters) +{ + struct perf_addr_filter *filter; + int range = 0; + + list_for_each_entry(filter, filters, entry) { + /* PT doesn't support single address triggers */ + if (!filter->range) + return -EOPNOTSUPP; + + if (!filter->inode && !kernel_ip(filter->offset)) + return -EINVAL; + + if (++range > pt_cap_get(PT_CAP_num_address_ranges)) + return -EOPNOTSUPP; + } + + return 0; +} + +static void pt_event_addr_filters_sync(struct perf_event *event) +{ + struct perf_addr_filters_head *head = perf_event_addr_filters(event); + unsigned long msr_a, msr_b, *offs = event->addr_filters_offs; + struct pt_filters *filters = event->hw.addr_filters; + struct perf_addr_filter *filter; + int range = 0; + + if (!filters) + return; - if (local_read(&buf->data_size) >= pt->handle.size) - return true; + list_for_each_entry(filter, &head->list, entry) { + if (filter->inode && !offs[range]) { + msr_a = msr_b = 0; + } else { + /* apply the offset */ + msr_a = filter->offset + offs[range]; + msr_b = filter->size + msr_a; + } + + filters->filter[range].msr_a = msr_a; + filters->filter[range].msr_b = msr_b; + filters->filter[range].config = filter->filter ? 1 : 2; + range++; + } - return false; + filters->nr_filters = range; } /** @@ -953,7 +1137,7 @@ void intel_pt_interrupt(void) * after PT has been disabled by pt_event_stop(). Make sure we don't * do anything (particularly, re-enable) for this event here. */ - if (!ACCESS_ONCE(pt->handle_nmi)) + if (!READ_ONCE(pt->handle_nmi)) return; /* @@ -1038,23 +1222,36 @@ EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); static void pt_event_start(struct perf_event *event, int mode) { + struct hw_perf_event *hwc = &event->hw; struct pt *pt = this_cpu_ptr(&pt_ctx); - struct pt_buffer *buf = perf_get_aux(&pt->handle); + struct pt_buffer *buf; if (READ_ONCE(pt->vmx_on)) return; - if (!buf || pt_buffer_is_full(buf, pt)) { - event->hw.state = PERF_HES_STOPPED; - return; + buf = perf_aux_output_begin(&pt->handle, event); + if (!buf) + goto fail_stop; + + pt_buffer_reset_offsets(buf, pt->handle.head); + if (!buf->snapshot) { + if (pt_buffer_reset_markers(buf, &pt->handle)) + goto fail_end_stop; } - ACCESS_ONCE(pt->handle_nmi) = 1; - event->hw.state = 0; + WRITE_ONCE(pt->handle_nmi, 1); + hwc->state = 0; pt_config_buffer(buf->cur->table, buf->cur_idx, buf->output_off); pt_config(event); + + return; + +fail_end_stop: + perf_aux_output_end(&pt->handle, 0, true); +fail_stop: + hwc->state = PERF_HES_STOPPED; } static void pt_event_stop(struct perf_event *event, int mode) @@ -1065,7 +1262,7 @@ static void pt_event_stop(struct perf_event *event, int mode) * Protect against the PMI racing with disabling wrmsr, * see comment in intel_pt_interrupt(). */ - ACCESS_ONCE(pt->handle_nmi) = 0; + WRITE_ONCE(pt->handle_nmi, 0); pt_config_stop(event); @@ -1088,19 +1285,7 @@ static void pt_event_stop(struct perf_event *event, int mode) pt_handle_status(pt); pt_update_head(pt); - } -} - -static void pt_event_del(struct perf_event *event, int mode) -{ - struct pt *pt = this_cpu_ptr(&pt_ctx); - struct pt_buffer *buf; - pt_event_stop(event, PERF_EF_UPDATE); - - buf = perf_get_aux(&pt->handle); - - if (buf) { if (buf->snapshot) pt->handle.head = local_xchg(&buf->data_size, @@ -1110,9 +1295,13 @@ static void pt_event_del(struct perf_event *event, int mode) } } +static void pt_event_del(struct perf_event *event, int mode) +{ + pt_event_stop(event, PERF_EF_UPDATE); +} + static int pt_event_add(struct perf_event *event, int mode) { - struct pt_buffer *buf; struct pt *pt = this_cpu_ptr(&pt_ctx); struct hw_perf_event *hwc = &event->hw; int ret = -EBUSY; @@ -1120,34 +1309,18 @@ static int pt_event_add(struct perf_event *event, int mode) if (pt->handle.event) goto fail; - buf = perf_aux_output_begin(&pt->handle, event); - ret = -EINVAL; - if (!buf) - goto fail_stop; - - pt_buffer_reset_offsets(buf, pt->handle.head); - if (!buf->snapshot) { - ret = pt_buffer_reset_markers(buf, &pt->handle); - if (ret) - goto fail_end_stop; - } - if (mode & PERF_EF_START) { pt_event_start(event, 0); - ret = -EBUSY; + ret = -EINVAL; if (hwc->state == PERF_HES_STOPPED) - goto fail_end_stop; + goto fail; } else { hwc->state = PERF_HES_STOPPED; } - return 0; - -fail_end_stop: - perf_aux_output_end(&pt->handle, 0, true); -fail_stop: - hwc->state = PERF_HES_STOPPED; + ret = 0; fail: + return ret; } @@ -1157,6 +1330,7 @@ static void pt_event_read(struct perf_event *event) static void pt_event_destroy(struct perf_event *event) { + pt_addr_filters_fini(event); x86_del_exclusive(x86_lbr_exclusive_pt); } @@ -1171,6 +1345,11 @@ static int pt_event_init(struct perf_event *event) if (x86_add_exclusive(x86_lbr_exclusive_pt)) return -EBUSY; + if (pt_addr_filters_init(event)) { + x86_del_exclusive(x86_lbr_exclusive_pt); + return -ENOMEM; + } + event->destroy = pt_event_destroy; return 0; @@ -1190,7 +1369,7 @@ static __init int pt_init(void) BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE); - if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT)) + if (!boot_cpu_has(X86_FEATURE_INTEL_PT)) return -ENODEV; get_online_cpus(); @@ -1224,16 +1403,21 @@ static __init int pt_init(void) PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF; pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; - pt_pmu.pmu.attr_groups = pt_attr_groups; - pt_pmu.pmu.task_ctx_nr = perf_sw_context; - pt_pmu.pmu.event_init = pt_event_init; - pt_pmu.pmu.add = pt_event_add; - pt_pmu.pmu.del = pt_event_del; - pt_pmu.pmu.start = pt_event_start; - pt_pmu.pmu.stop = pt_event_stop; - pt_pmu.pmu.read = pt_event_read; - pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; - pt_pmu.pmu.free_aux = pt_buffer_free_aux; + pt_pmu.pmu.attr_groups = pt_attr_groups; + pt_pmu.pmu.task_ctx_nr = perf_sw_context; + pt_pmu.pmu.event_init = pt_event_init; + pt_pmu.pmu.add = pt_event_add; + pt_pmu.pmu.del = pt_event_del; + pt_pmu.pmu.start = pt_event_start; + pt_pmu.pmu.stop = pt_event_stop; + pt_pmu.pmu.read = pt_event_read; + pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; + pt_pmu.pmu.free_aux = pt_buffer_free_aux; + pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync; + pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate; + pt_pmu.pmu.nr_addr_filters = + pt_cap_get(PT_CAP_num_address_ranges); + ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1); return ret; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 3abb5f5cccc8..efffa4a09f68 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -20,6 +20,40 @@ #define __INTEL_PT_H__ /* + * PT MSR bit definitions + */ +#define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_CYCLEACC BIT(1) +#define RTIT_CTL_OS BIT(2) +#define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_CR3EN BIT(7) +#define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_MTC_EN BIT(9) +#define RTIT_CTL_TSC_EN BIT(10) +#define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_MTC_RANGE_OFFSET 14 +#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) +#define RTIT_CTL_CYC_THRESH_OFFSET 19 +#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) +#define RTIT_CTL_PSB_FREQ_OFFSET 24 +#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) +#define RTIT_CTL_ADDR0_OFFSET 32 +#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET) +#define RTIT_CTL_ADDR1_OFFSET 36 +#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET) +#define RTIT_CTL_ADDR2_OFFSET 40 +#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET) +#define RTIT_CTL_ADDR3_OFFSET 44 +#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET) +#define RTIT_STATUS_FILTEREN BIT(0) +#define RTIT_STATUS_CONTEXTEN BIT(1) +#define RTIT_STATUS_TRIGGEREN BIT(2) +#define RTIT_STATUS_BUFFOVF BIT(3) +#define RTIT_STATUS_ERROR BIT(4) +#define RTIT_STATUS_STOPPED BIT(5) + +/* * Single-entry ToPA: when this close to region boundary, switch * buffers to avoid losing data. */ @@ -48,15 +82,20 @@ struct topa_entry { #define PT_CPUID_LEAVES 2 #define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */ +/* TSC to Core Crystal Clock Ratio */ +#define CPUID_TSC_LEAF 0x15 + enum pt_capabilities { PT_CAP_max_subleaf = 0, PT_CAP_cr3_filtering, PT_CAP_psb_cyc, + PT_CAP_ip_filtering, PT_CAP_mtc, PT_CAP_topa_output, PT_CAP_topa_multiple_entries, PT_CAP_single_range_output, PT_CAP_payloads_lip, + PT_CAP_num_address_ranges, PT_CAP_mtc_periods, PT_CAP_cycle_thresholds, PT_CAP_psb_periods, @@ -66,6 +105,9 @@ struct pt_pmu { struct pmu pmu; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; bool vmx; + unsigned long max_nonturbo_ratio; + unsigned int tsc_art_num; + unsigned int tsc_art_den; }; /** @@ -104,14 +146,40 @@ struct pt_buffer { struct topa_entry *topa_index[0]; }; +#define PT_FILTERS_NUM 4 + +/** + * struct pt_filter - IP range filter configuration + * @msr_a: range start, goes to RTIT_ADDRn_A + * @msr_b: range end, goes to RTIT_ADDRn_B + * @config: 4-bit field in RTIT_CTL + */ +struct pt_filter { + unsigned long msr_a; + unsigned long msr_b; + unsigned long config; +}; + +/** + * struct pt_filters - IP range filtering context + * @filter: filters defined for this context + * @nr_filters: number of defined filters in the @filter array + */ +struct pt_filters { + struct pt_filter filter[PT_FILTERS_NUM]; + unsigned int nr_filters; +}; + /** * struct pt - per-cpu pt context * @handle: perf output handle + * @filters: last configured filters * @handle_nmi: do handle PT PMI on this cpu, there's an active event * @vmx_on: 1 if VMX is ON on this cpu */ struct pt { struct perf_output_handle handle; + struct pt_filters filters; int handle_nmi; int vmx_on; }; diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 1705c9d75e44..99c4bab123cd 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -27,10 +27,14 @@ * event: rapl_energy_dram * perf code: 0x3 * - * dram counter: consumption of the builtin-gpu domain (client only) + * gpu counter: consumption of the builtin-gpu domain (client only) * event: rapl_energy_gpu * perf code: 0x4 * + * psys counter: consumption of the builtin-psys domain (client only) + * event: rapl_energy_psys + * perf code: 0x5 + * * We manage those counters as free running (read-only). They may be * use simultaneously by other tools, such as turbostat. * @@ -53,6 +57,8 @@ #include <asm/cpu_device_id.h> #include "../perf_event.h" +MODULE_LICENSE("GPL"); + /* * RAPL energy status counters */ @@ -64,13 +70,16 @@ #define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ +#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */ +#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */ -#define NR_RAPL_DOMAINS 0x4 +#define NR_RAPL_DOMAINS 0x5 static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "pp0-core", "package", "dram", "pp1-gpu", + "psys", }; /* Clients have PP0, PKG */ @@ -89,6 +98,13 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { 1<<RAPL_IDX_RAM_NRG_STAT|\ 1<<RAPL_IDX_PP1_NRG_STAT) +/* SKL clients have PP0, PKG, RAM, PP1, PSYS */ +#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ + 1<<RAPL_IDX_PKG_NRG_STAT|\ + 1<<RAPL_IDX_RAM_NRG_STAT|\ + 1<<RAPL_IDX_PP1_NRG_STAT|\ + 1<<RAPL_IDX_PSYS_NRG_STAT) + /* Knights Landing has PKG, RAM */ #define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\ 1<<RAPL_IDX_RAM_NRG_STAT) @@ -360,6 +376,10 @@ static int rapl_pmu_event_init(struct perf_event *event) bit = RAPL_IDX_PP1_NRG_STAT; msr = MSR_PP1_ENERGY_STATUS; break; + case INTEL_RAPL_PSYS: + bit = RAPL_IDX_PSYS_NRG_STAT; + msr = MSR_PLATFORM_ENERGY_STATUS; + break; default: return -EINVAL; } @@ -414,11 +434,13 @@ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); +RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules"); /* * we compute in 0.23 nJ increments regardless of MSR @@ -427,6 +449,7 @@ RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); static struct attribute *rapl_events_srv_attr[] = { EVENT_PTR(rapl_cores), @@ -476,6 +499,27 @@ static struct attribute *rapl_events_hsw_attr[] = { NULL, }; +static struct attribute *rapl_events_skl_attr[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_pkg), + EVENT_PTR(rapl_gpu), + EVENT_PTR(rapl_ram), + EVENT_PTR(rapl_psys), + + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_pkg_unit), + EVENT_PTR(rapl_gpu_unit), + EVENT_PTR(rapl_ram_unit), + EVENT_PTR(rapl_psys_unit), + + EVENT_PTR(rapl_cores_scale), + EVENT_PTR(rapl_pkg_scale), + EVENT_PTR(rapl_gpu_scale), + EVENT_PTR(rapl_ram_scale), + EVENT_PTR(rapl_psys_scale), + NULL, +}; + static struct attribute *rapl_events_knl_attr[] = { EVENT_PTR(rapl_pkg), EVENT_PTR(rapl_ram), @@ -592,6 +636,11 @@ static int rapl_cpu_notifier(struct notifier_block *self, return NOTIFY_OK; } +static struct notifier_block rapl_cpu_nb = { + .notifier_call = rapl_cpu_notifier, + .priority = CPU_PRI_PERF + 1, +}; + static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -660,7 +709,7 @@ static int __init rapl_prepare_cpus(void) return 0; } -static void __init cleanup_rapl_pmus(void) +static void cleanup_rapl_pmus(void) { int i; @@ -691,52 +740,92 @@ static int __init init_rapl_pmus(void) return 0; } +#define X86_RAPL_MODEL_MATCH(model, init) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } + +struct intel_rapl_init_fun { + bool apply_quirk; + int cntr_mask; + struct attribute **attrs; +}; + +static const struct intel_rapl_init_fun snb_rapl_init __initconst = { + .apply_quirk = false, + .cntr_mask = RAPL_IDX_CLN, + .attrs = rapl_events_cln_attr, +}; + +static const struct intel_rapl_init_fun hsx_rapl_init __initconst = { + .apply_quirk = true, + .cntr_mask = RAPL_IDX_SRV, + .attrs = rapl_events_srv_attr, +}; + +static const struct intel_rapl_init_fun hsw_rapl_init __initconst = { + .apply_quirk = false, + .cntr_mask = RAPL_IDX_HSW, + .attrs = rapl_events_hsw_attr, +}; + +static const struct intel_rapl_init_fun snbep_rapl_init __initconst = { + .apply_quirk = false, + .cntr_mask = RAPL_IDX_SRV, + .attrs = rapl_events_srv_attr, +}; + +static const struct intel_rapl_init_fun knl_rapl_init __initconst = { + .apply_quirk = true, + .cntr_mask = RAPL_IDX_KNL, + .attrs = rapl_events_knl_attr, +}; + +static const struct intel_rapl_init_fun skl_rapl_init __initconst = { + .apply_quirk = false, + .cntr_mask = RAPL_IDX_SKL_CLN, + .attrs = rapl_events_skl_attr, +}; + static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, - [1] = {}, + X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */ + X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */ + + X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */ + X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */ + + X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */ + X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */ + X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */ + X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */ + + X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */ + X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */ + X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */ + X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */ + + X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */ + + X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */ + X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */ + {}, }; +MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match); + static int __init rapl_pmu_init(void) { - bool apply_quirk = false; + const struct x86_cpu_id *id; + struct intel_rapl_init_fun *rapl_init; + bool apply_quirk; int ret; - if (!x86_match_cpu(rapl_cpu_match)) + id = x86_match_cpu(rapl_cpu_match); + if (!id) return -ENODEV; - switch (boot_cpu_data.x86_model) { - case 42: /* Sandy Bridge */ - case 58: /* Ivy Bridge */ - rapl_cntr_mask = RAPL_IDX_CLN; - rapl_pmu_events_group.attrs = rapl_events_cln_attr; - break; - case 63: /* Haswell-Server */ - case 79: /* Broadwell-Server */ - apply_quirk = true; - rapl_cntr_mask = RAPL_IDX_SRV; - rapl_pmu_events_group.attrs = rapl_events_srv_attr; - break; - case 60: /* Haswell */ - case 69: /* Haswell-Celeron */ - case 70: /* Haswell GT3e */ - case 61: /* Broadwell */ - case 71: /* Broadwell-H */ - rapl_cntr_mask = RAPL_IDX_HSW; - rapl_pmu_events_group.attrs = rapl_events_hsw_attr; - break; - case 45: /* Sandy Bridge-EP */ - case 62: /* IvyTown */ - rapl_cntr_mask = RAPL_IDX_SRV; - rapl_pmu_events_group.attrs = rapl_events_srv_attr; - break; - case 87: /* Knights Landing */ - apply_quirk = true; - rapl_cntr_mask = RAPL_IDX_KNL; - rapl_pmu_events_group.attrs = rapl_events_knl_attr; - break; - default: - return -ENODEV; - } + rapl_init = (struct intel_rapl_init_fun *)id->driver_data; + apply_quirk = rapl_init->apply_quirk; + rapl_cntr_mask = rapl_init->cntr_mask; + rapl_pmu_events_group.attrs = rapl_init->attrs; ret = rapl_check_hw_unit(apply_quirk); if (ret) @@ -756,7 +845,7 @@ static int __init rapl_pmu_init(void) if (ret) goto out; - __perf_cpu_notifier(rapl_cpu_notifier); + __register_cpu_notifier(&rapl_cpu_nb); cpu_notifier_register_done(); rapl_advertise(); return 0; @@ -767,4 +856,14 @@ out: cpu_notifier_register_done(); return ret; } -device_initcall(rapl_pmu_init); +module_init(rapl_pmu_init); + +static void __exit intel_rapl_exit(void) +{ + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&rapl_cpu_nb); + perf_pmu_unregister(&rapl_pmus->pmu); + cleanup_rapl_pmus(); + cpu_notifier_register_done(); +} +module_exit(intel_rapl_exit); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7012d18bb293..16c178916412 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,3 +1,4 @@ +#include <asm/cpu_device_id.h> #include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; @@ -21,6 +22,8 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); +MODULE_LICENSE("GPL"); + static int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; @@ -754,7 +757,7 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) pmu->registered = false; } -static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) +static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) { struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; @@ -770,7 +773,7 @@ static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) } } -static void __init uncore_exit_boxes(void *dummy) +static void uncore_exit_boxes(void *dummy) { struct intel_uncore_type **types; @@ -787,7 +790,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu) kfree(pmu->boxes); } -static void __init uncore_type_exit(struct intel_uncore_type *type) +static void uncore_type_exit(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmu = type->pmus; int i; @@ -804,7 +807,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) type->events_group = NULL; } -static void __init uncore_types_exit(struct intel_uncore_type **types) +static void uncore_types_exit(struct intel_uncore_type **types) { for (; *types; types++) uncore_type_exit(*types); @@ -989,46 +992,6 @@ static int __init uncore_pci_init(void) size_t size; int ret; - switch (boot_cpu_data.x86_model) { - case 45: /* Sandy Bridge-EP */ - ret = snbep_uncore_pci_init(); - break; - case 62: /* Ivy Bridge-EP */ - ret = ivbep_uncore_pci_init(); - break; - case 63: /* Haswell-EP */ - ret = hswep_uncore_pci_init(); - break; - case 79: /* BDX-EP */ - case 86: /* BDX-DE */ - ret = bdx_uncore_pci_init(); - break; - case 42: /* Sandy Bridge */ - ret = snb_uncore_pci_init(); - break; - case 58: /* Ivy Bridge */ - ret = ivb_uncore_pci_init(); - break; - case 60: /* Haswell */ - case 69: /* Haswell Celeron */ - ret = hsw_uncore_pci_init(); - break; - case 61: /* Broadwell */ - ret = bdw_uncore_pci_init(); - break; - case 87: /* Knights Landing */ - ret = knl_uncore_pci_init(); - break; - case 94: /* SkyLake */ - ret = skl_uncore_pci_init(); - break; - default: - return -ENODEV; - } - - if (ret) - return ret; - size = max_packages * sizeof(struct pci_extra_dev); uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); if (!uncore_extra_pci_dev) { @@ -1060,7 +1023,7 @@ err: return ret; } -static void __init uncore_pci_exit(void) +static void uncore_pci_exit(void) { if (pcidrv_registered) { pcidrv_registered = false; @@ -1287,46 +1250,6 @@ static int __init uncore_cpu_init(void) { int ret; - switch (boot_cpu_data.x86_model) { - case 26: /* Nehalem */ - case 30: - case 37: /* Westmere */ - case 44: - nhm_uncore_cpu_init(); - break; - case 42: /* Sandy Bridge */ - case 58: /* Ivy Bridge */ - case 60: /* Haswell */ - case 69: /* Haswell */ - case 70: /* Haswell */ - case 61: /* Broadwell */ - case 71: /* Broadwell */ - snb_uncore_cpu_init(); - break; - case 45: /* Sandy Bridge-EP */ - snbep_uncore_cpu_init(); - break; - case 46: /* Nehalem-EX */ - case 47: /* Westmere-EX aka. Xeon E7 */ - nhmex_uncore_cpu_init(); - break; - case 62: /* Ivy Bridge-EP */ - ivbep_uncore_cpu_init(); - break; - case 63: /* Haswell-EP */ - hswep_uncore_cpu_init(); - break; - case 79: /* BDX-EP */ - case 86: /* BDX-DE */ - bdx_uncore_cpu_init(); - break; - case 87: /* Knights Landing */ - knl_uncore_cpu_init(); - break; - default: - return -ENODEV; - } - ret = uncore_types_init(uncore_msr_uncores, true); if (ret) goto err; @@ -1376,20 +1299,123 @@ static int __init uncore_cpumask_init(bool msr) return 0; } +#define X86_UNCORE_MODEL_MATCH(model, init) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } + +struct intel_uncore_init_fun { + void (*cpu_init)(void); + int (*pci_init)(void); +}; + +static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { + .cpu_init = nhm_uncore_cpu_init, +}; + +static const struct intel_uncore_init_fun snb_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = snb_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = ivb_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = hsw_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = bdw_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { + .cpu_init = snbep_uncore_cpu_init, + .pci_init = snbep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { + .cpu_init = nhmex_uncore_cpu_init, +}; + +static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { + .cpu_init = ivbep_uncore_cpu_init, + .pci_init = ivbep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { + .cpu_init = hswep_uncore_cpu_init, + .pci_init = hswep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { + .cpu_init = bdx_uncore_cpu_init, + .pci_init = bdx_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun knl_uncore_init __initconst = { + .cpu_init = knl_uncore_cpu_init, + .pci_init = knl_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun skl_uncore_init __initconst = { + .pci_init = skl_uncore_pci_init, +}; + +static const struct x86_cpu_id intel_uncore_match[] __initconst = { + X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */ + X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */ + X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */ + X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */ + X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */ + X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */ + X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */ + X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */ + X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */ + X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */ + X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */ + X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */ + X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */ + X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */ + X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */ + X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */ + X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */ + X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */ + {}, +}; + +MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); + static int __init intel_uncore_init(void) { - int pret, cret, ret; + const struct x86_cpu_id *id; + struct intel_uncore_init_fun *uncore_init; + int pret = 0, cret = 0, ret; - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + id = x86_match_cpu(intel_uncore_match); + if (!id) return -ENODEV; - if (cpu_has_hypervisor) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return -ENODEV; max_packages = topology_max_packages(); - pret = uncore_pci_init(); - cret = uncore_cpu_init(); + uncore_init = (struct intel_uncore_init_fun *)id->driver_data; + if (uncore_init->pci_init) { + pret = uncore_init->pci_init(); + if (!pret) + pret = uncore_pci_init(); + } + + if (uncore_init->cpu_init) { + uncore_init->cpu_init(); + cret = uncore_cpu_init(); + } if (cret && pret) return -ENODEV; @@ -1409,4 +1435,14 @@ err: cpu_notifier_register_done(); return ret; } -device_initcall(intel_uncore_init); +module_init(intel_uncore_init); + +static void __exit intel_uncore_exit(void) +{ + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&uncore_cpu_nb); + uncore_types_exit(uncore_msr_uncores); + uncore_pci_exit(); + cpu_notifier_register_done(); +} +module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index ab2bcaaebe38..b2625867ebd1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -219,6 +219,9 @@ #define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff #define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18) #define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32) +#define KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE (0x1ULL << 32) +#define KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE (0x1ULL << 33) +#define KNL_CHA_MSR_PMON_BOX_FILTER_NNC (0x1ULL << 37) /* KNL EDC/MC UCLK */ #define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400 @@ -1902,6 +1905,10 @@ static int knl_cha_hw_config(struct intel_uncore_box *box, reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx; reg1->config = event->attr.config1 & knl_cha_filter_mask(idx); + + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE; + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE; + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_NNC; reg1->idx = idx; } return 0; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ec863b9a9f78..85ef3c2e80e0 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -6,6 +6,8 @@ enum perf_msr_id { PERF_MSR_MPERF = 2, PERF_MSR_PPERF = 3, PERF_MSR_SMI = 4, + PERF_MSR_PTSC = 5, + PERF_MSR_IRPERF = 6, PERF_MSR_EVENT_MAX, }; @@ -15,6 +17,16 @@ static bool test_aperfmperf(int idx) return boot_cpu_has(X86_FEATURE_APERFMPERF); } +static bool test_ptsc(int idx) +{ + return boot_cpu_has(X86_FEATURE_PTSC); +} + +static bool test_irperf(int idx) +{ + return boot_cpu_has(X86_FEATURE_IRPERF); +} + static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || @@ -69,18 +81,22 @@ struct perf_msr { bool (*test)(int idx); }; -PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00"); -PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01"); -PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02"); -PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); -PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); +PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00"); +PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01"); +PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02"); +PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); +PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); +PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05"); +PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06"); static struct perf_msr msr[] = { - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, + [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, }; static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { @@ -166,7 +182,7 @@ again: if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) delta = sign_extend64(delta, 31); - local64_add(now - prev, &event->count); + local64_add(delta, &event->count); } static void msr_event_start(struct perf_event *event, int flags) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ad4dc7ffffb5..8bd764df815d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -601,6 +601,7 @@ struct x86_pmu { u64 lbr_sel_mask; /* LBR_SELECT valid bits */ const int *lbr_sel_map; /* lbr_select mappings */ bool lbr_double_abort; /* duplicated lbr aborts */ + bool lbr_pt_coexist; /* LBR may coexist with PT */ /* * Intel PT/LBR/BTS are exclusive @@ -859,6 +860,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[]; extern struct event_constraint intel_slm_pebs_event_constraints[]; +extern struct event_constraint intel_glm_pebs_event_constraints[]; + extern struct event_constraint intel_nehalem_pebs_event_constraints[]; extern struct event_constraint intel_westmere_pebs_event_constraints[]; @@ -907,6 +910,8 @@ void intel_pmu_lbr_init_nhm(void); void intel_pmu_lbr_init_atom(void); +void intel_pmu_lbr_init_slm(void); + void intel_pmu_lbr_init_snb(void); void intel_pmu_lbr_init_hsw(void); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 0552884da18d..2f29f4e407c3 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -357,7 +357,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); /* Create the ucontext. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 99afb665a004..e77a6443104f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -1,11 +1,12 @@ #ifndef _ASM_X86_ALTERNATIVE_H #define _ASM_X86_ALTERNATIVE_H +#ifndef __ASSEMBLY__ + #include <linux/types.h> #include <linux/stddef.h> #include <linux/stringify.h> #include <asm/asm.h> -#include <asm/ptrace.h> /* * Alternative inline assembly for SMP. @@ -233,36 +234,6 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr -struct paravirt_patch_site; -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end); -#else -static inline void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{} -#define __parainstructions NULL -#define __parainstructions_end NULL -#endif - -extern void *text_poke_early(void *addr, const void *opcode, size_t len); - -/* - * Clear and restore the kernel write-protection flag on the local CPU. - * Allows the kernel to edit read-only pages. - * Side-effect: any interrupt handler running between save and restore will have - * the ability to write to read-only pages. - * - * Warning: - * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and - * no thread can be preempted in the instructions being modified (no iret to an - * invalid instruction possible) or if the instructions are changed from a - * consistent state to another consistent state atomically. - * On the local CPU you need to be protected again NMI or MCE handlers seeing an - * inconsistent instruction while you patch. - */ -extern void *text_poke(void *addr, const void *opcode, size_t len); -extern int poke_int3_handler(struct pt_regs *regs); -extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +#endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 98f25bbafac4..bc27611fa58f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -239,10 +239,10 @@ extern void __init check_x2apic(void); extern void x2apic_setup(void); static inline int x2apic_enabled(void) { - return cpu_has_x2apic && apic_is_x2apic_enabled(); + return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled(); } -#define x2apic_supported() (cpu_has_x2apic) +#define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC)) #else /* !CONFIG_X86_X2APIC */ static inline void check_x2apic(void) { } static inline void x2apic_setup(void) { } diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h index aa6a3170ab5a..2b00c776f223 100644 --- a/arch/x86/include/asm/bios_ebda.h +++ b/arch/x86/include/asm/bios_ebda.h @@ -17,27 +17,6 @@ static inline unsigned int get_bios_ebda(void) return address; /* 0 means none */ } -/* - * Return the sanitized length of the EBDA in bytes, if it exists. - */ -static inline unsigned int get_bios_ebda_length(void) -{ - unsigned int address; - unsigned int length; - - address = get_bios_ebda(); - if (!address) - return 0; - - /* EBDA length is byte 0 of the EBDA (stored in KiB) */ - length = *(unsigned char *)phys_to_virt(address); - length <<= 10; - - /* Trim the length if it extends beyond 640KiB */ - length = min_t(unsigned int, (640 * 1024) - address, length); - return length; -} - void reserve_ebda_region(void); #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6b8d6e8cd449..abd06b19ddd2 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -12,29 +12,46 @@ /* Minimum kernel alignment, as a power of two */ #ifdef CONFIG_X86_64 -#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT +# define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT #else -#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER) +# define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER) #endif #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) -#error "Invalid value for CONFIG_PHYSICAL_ALIGN" +# error "Invalid value for CONFIG_PHYSICAL_ALIGN" #endif #ifdef CONFIG_KERNEL_BZIP2 -#define BOOT_HEAP_SIZE 0x400000 +# define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ - -#define BOOT_HEAP_SIZE 0x10000 - -#endif /* !CONFIG_KERNEL_BZIP2 */ +# define BOOT_HEAP_SIZE 0x10000 +#endif #ifdef CONFIG_X86_64 -#define BOOT_STACK_SIZE 0x4000 -#else -#define BOOT_STACK_SIZE 0x1000 +# define BOOT_STACK_SIZE 0x4000 + +# define BOOT_INIT_PGT_SIZE (6*4096) +# ifdef CONFIG_RANDOMIZE_BASE +/* + * Assuming all cross the 512GB boundary: + * 1 page for level4 + * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel + * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP). + * Total is 19 pages. + */ +# ifdef CONFIG_X86_VERBOSE_BOOTUP +# define BOOT_PGT_SIZE (19*4096) +# else /* !CONFIG_X86_VERBOSE_BOOTUP */ +# define BOOT_PGT_SIZE (17*4096) +# endif +# else /* !CONFIG_RANDOMIZE_BASE */ +# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE +# endif + +#else /* !CONFIG_X86_64 */ +# define BOOT_STACK_SIZE 0x1000 #endif #endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index d194266acb28..eae33c7170c8 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -3,11 +3,10 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#define VCLOCK_NONE 0 /* No vDSO clock available. */ -#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ -#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ -#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ -#define VCLOCK_MAX 3 +#define VCLOCK_NONE 0 /* No vDSO clock available. */ +#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ +#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */ +#define VCLOCK_MAX 2 struct arch_clocksource_data { int vclock_mode; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index ebb102e1bbc7..5a3b2c119ed0 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *)round_down(sp - len, 16); } -static inline bool is_x32_task(void) +static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) @@ -318,7 +318,7 @@ static inline bool is_x32_task(void) static inline bool in_compat_syscall(void) { - return is_ia32_task() || is_x32_task(); + return in_ia32_syscall() || in_x32_syscall(); } #define in_compat_syscall in_compat_syscall /* override the generic impl */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3636ec06c887..25ebb54905e0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -27,6 +27,7 @@ enum cpuid_leafs CPUID_6_EAX, CPUID_8000_000A_EDX, CPUID_7_ECX, + CPUID_8000_0007_EBX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -118,31 +119,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) -#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) -#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) -#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) -#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) -#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) -#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) -#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) -#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) -#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) -#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) -#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) -#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) -#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) -#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) -#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) -#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) -#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) -/* - * Do not add any more of those clumsy macros - use static_cpu_has() for - * fast paths and boot_cpu_has() otherwise! - */ - #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8f9afefd2dc5..4a413485f9eb 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -12,7 +12,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 17 /* N 32-bit words worth of info */ +#define NCAPINTS 18 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -177,6 +177,7 @@ #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ @@ -250,6 +251,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -280,6 +282,11 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ + /* * BUG word(s) */ @@ -294,6 +301,9 @@ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */ + #ifdef CONFIG_X86_32 /* diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 53748c45e488..78d1e7467eae 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,7 @@ #include <asm/fpu/api.h> #include <asm/pgtable.h> +#include <asm/processor-flags.h> #include <asm/tlb.h> /* @@ -28,33 +29,22 @@ #define MAX_CMDLINE_ADDRESS UINT_MAX -#ifdef CONFIG_X86_32 +#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF +#ifdef CONFIG_X86_32 extern unsigned long asmlinkage efi_call_phys(void *, ...); +#define arch_efi_call_virt_setup() kernel_fpu_begin() +#define arch_efi_call_virt_teardown() kernel_fpu_end() + /* * Wrap all the virtual calls in a way that forces the parameters on the stack. */ - -/* Use this macro if your virtual returns a non-void value */ -#define efi_call_virt(f, args...) \ +#define arch_efi_call_virt(f, args...) \ ({ \ - efi_status_t __s; \ - kernel_fpu_begin(); \ - __s = ((efi_##f##_t __attribute__((regparm(0)))*) \ - efi.systab->runtime->f)(args); \ - kernel_fpu_end(); \ - __s; \ -}) - -/* Use this macro if your virtual call does not return any value */ -#define __efi_call_virt(f, args...) \ -({ \ - kernel_fpu_begin(); \ ((efi_##f##_t __attribute__((regparm(0)))*) \ efi.systab->runtime->f)(args); \ - kernel_fpu_end(); \ }) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) @@ -78,10 +68,8 @@ struct efi_scratch { u64 phys_stack; } __packed; -#define efi_call_virt(f, ...) \ +#define arch_efi_call_virt_setup() \ ({ \ - efi_status_t __s; \ - \ efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ @@ -91,9 +79,13 @@ struct efi_scratch { write_cr3((unsigned long)efi_scratch.efi_pgt); \ __flush_tlb_all(); \ } \ - \ - __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ - \ +}) + +#define arch_efi_call_virt(f, args...) \ + efi_call((void *)efi.systab->runtime->f, args) \ + +#define arch_efi_call_virt_teardown() \ +({ \ if (efi_scratch.use_pgd) { \ write_cr3(efi_scratch.prev_cr3); \ __flush_tlb_all(); \ @@ -101,15 +93,8 @@ struct efi_scratch { \ __kernel_fpu_end(); \ preempt_enable(); \ - __s; \ }) -/* - * All X86_64 virt calls return non-void values. Thus, use non-void call for - * virt calls that would be void on X86_32. - */ -#define __efi_call_virt(f, args...) efi_call_virt(f, args) - extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); @@ -180,6 +165,8 @@ static inline bool efi_runtime_supported(void) extern struct console early_efi_console; extern void parse_efi_setup(u64 phys_addr, u32 data_len); +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); + #ifdef CONFIG_EFI_MIXED extern void efi_thunk_runtime_setup(void); extern efi_status_t efi_thunk_set_virtual_address_map( @@ -225,6 +212,11 @@ __pure const struct efi_config *__efi_early(void); #define efi_call_early(f, ...) \ __efi_early()->call(__efi_early()->f, __VA_ARGS__); +#define __efi_call_early(f, ...) \ + __efi_early()->call((unsigned long)f, __VA_ARGS__); + +#define efi_is_64bit() __efi_early()->is64 + extern bool efi_reboot_required(void); #else diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 15340e36ddcb..fea7724141a0 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -176,7 +176,7 @@ static inline void elf_common_init(struct thread_struct *t, regs->si = regs->di = regs->bp = 0; regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; - t->fs = t->gs = 0; + t->fsbase = t->gsbase = 0; t->fsindex = t->gsindex = 0; t->ds = t->es = ds; } @@ -226,8 +226,8 @@ do { \ (pr_reg)[18] = (regs)->flags; \ (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ - (pr_reg)[21] = current->thread.fs; \ - (pr_reg)[22] = current->thread.gs; \ + (pr_reg)[21] = current->thread.fsbase; \ + (pr_reg)[22] = current->thread.gsbase; \ asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index e6a8613fbfb0..3a106165e03a 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -4,7 +4,7 @@ #include <asm/page.h> #include <asm-generic/hugetlb.h> -#define hugepages_supported() cpu_has_pse +#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE) static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index d0afb05c84fc..f70604125286 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -5,7 +5,7 @@ static inline bool arch_irq_work_has_interrupt(void) { - return cpu_has_apic; + return boot_cpu_has(X86_FEATURE_APIC); } #endif /* _ASM_IRQ_WORK_H */ diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 332f98c9111f..22a8537eb780 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h @@ -6,6 +6,8 @@ * Copyright (C) 2008 Wind River Systems, Inc. */ +#include <asm/ptrace.h> + /* * BUFMAX defines the maximum number of characters in inbound/outbound * buffers at least NUMREGBYTES*2 are needed for register packets diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 79327e9483a3..0ccb26dda126 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -8,40 +8,6 @@ #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) - -/* - * Make sure the compiler doesn't do anything stupid with the - * arguments on the stack - they are owned by the *caller*, not - * the callee. This just fools gcc into not spilling into them, - * and keeps it from doing tailcall recursion and/or using the - * stack slots for temporaries, since they are live and "used" - * all the way to the end of the function. - * - * NOTE! On x86-64, all the arguments are in registers, so this - * only matters on a 32-bit kernel. - */ -#define asmlinkage_protect(n, ret, args...) \ - __asmlinkage_protect##n(ret, ##args) -#define __asmlinkage_protect_n(ret, args...) \ - __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) -#define __asmlinkage_protect0(ret) \ - __asmlinkage_protect_n(ret) -#define __asmlinkage_protect1(ret, arg1) \ - __asmlinkage_protect_n(ret, "m" (arg1)) -#define __asmlinkage_protect2(ret, arg1, arg2) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) -#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) -#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ - "m" (arg4)) -#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ - "m" (arg4), "m" (arg5)) -#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ - "m" (arg4), "m" (arg5), "m" (arg6)) - #endif /* CONFIG_X86_32 */ #ifdef __ASSEMBLY__ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 92b6f651fa4f..8bf766ef0e18 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -104,13 +104,23 @@ #define MCE_LOG_SIGNATURE "MACHINECHECK" /* AMD Scalable MCA */ +#define MSR_AMD64_SMCA_MC0_CTL 0xc0002000 +#define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001 +#define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002 #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 +#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a +#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) /* @@ -168,9 +178,18 @@ struct mce_vendor_flags { __reserved_0 : 61; }; + +struct mca_msr_regs { + u32 (*ctl) (int bank); + u32 (*status) (int bank); + u32 (*addr) (int bank); + u32 (*misc) (int bank); +}; + extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; +extern struct mca_msr_regs msr_ops; extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 84280029cafd..396348196aa7 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -115,103 +115,12 @@ static inline void destroy_context(struct mm_struct *mm) destroy_context_ldt(mm); } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned cpu = smp_processor_id(); +extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk); - if (likely(prev != next)) { -#ifdef CONFIG_SMP - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - this_cpu_write(cpu_tlbstate.active_mm, next); -#endif - cpumask_set_cpu(cpu, mm_cpumask(next)); - - /* - * Re-load page tables. - * - * This logic has an ordering constraint: - * - * CPU 0: Write to a PTE for 'next' - * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. - * CPU 1: set bit 1 in next's mm_cpumask - * CPU 1: load from the PTE that CPU 0 writes (implicit) - * - * We need to prevent an outcome in which CPU 1 observes - * the new PTE value and CPU 0 observes bit 1 clear in - * mm_cpumask. (If that occurs, then the IPI will never - * be sent, and CPU 0's TLB will contain a stale entry.) - * - * The bad outcome can occur if either CPU's load is - * reordered before that CPU's store, so both CPUs must - * execute full barriers to prevent this from happening. - * - * Thus, switch_mm needs a full barrier between the - * store to mm_cpumask and any operation that could load - * from next->pgd. TLB fills are special and can happen - * due to instruction fetches or for no reason at all, - * and neither LOCK nor MFENCE orders them. - * Fortunately, load_cr3() is serializing and gives the - * ordering guarantee we need. - * - */ - load_cr3(next->pgd); - - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - - /* Stop flush ipis for the previous mm */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - - /* Load per-mm CR4 state */ - load_mm_cr4(next); - -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT, if the LDT is different. - * - * It's possible that prev->context.ldt doesn't match - * the LDT register. This can happen if leave_mm(prev) - * was called and then modify_ldt changed - * prev->context.ldt but suppressed an IPI to this CPU. - * In this case, prev->context.ldt != NULL, because we - * never set context.ldt to NULL while the mm still - * exists. That means that next->context.ldt != - * prev->context.ldt, because mms never share an LDT. - */ - if (unlikely(prev->context.ldt != next->context.ldt)) - load_mm_ldt(next); -#endif - } -#ifdef CONFIG_SMP - else { - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); - - if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { - /* - * On established mms, the mm_cpumask is only changed - * from irq context, from ptep_clear_flush() while in - * lazy tlb mode, and here. Irqs are blocked during - * schedule, protecting us from simultaneous changes. - */ - cpumask_set_cpu(cpu, mm_cpumask(next)); - - /* - * We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload CR3 - * to make sure to use no freed page tables. - * - * As above, load_cr3() is serializing and orders TLB - * fills with respect to the mm_cpumask write. - */ - load_cr3(next->pgd); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - load_mm_cr4(next); - load_mm_ldt(next); - } - } -#endif -} +extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk); +#define switch_mm_irqs_off switch_mm_irqs_off #define activate_mm(prev, next) \ do { \ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5b3c9a55f51c..5a73a9c62c39 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -89,27 +89,16 @@ #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_IA32_RTIT_CTL 0x00000570 -#define RTIT_CTL_TRACEEN BIT(0) -#define RTIT_CTL_CYCLEACC BIT(1) -#define RTIT_CTL_OS BIT(2) -#define RTIT_CTL_USR BIT(3) -#define RTIT_CTL_CR3EN BIT(7) -#define RTIT_CTL_TOPA BIT(8) -#define RTIT_CTL_MTC_EN BIT(9) -#define RTIT_CTL_TSC_EN BIT(10) -#define RTIT_CTL_DISRETC BIT(11) -#define RTIT_CTL_BRANCH_EN BIT(13) -#define RTIT_CTL_MTC_RANGE_OFFSET 14 -#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) -#define RTIT_CTL_CYC_THRESH_OFFSET 19 -#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) -#define RTIT_CTL_PSB_FREQ_OFFSET 24 -#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) #define MSR_IA32_RTIT_STATUS 0x00000571 -#define RTIT_STATUS_CONTEXTEN BIT(1) -#define RTIT_STATUS_TRIGGEREN BIT(2) -#define RTIT_STATUS_ERROR BIT(4) -#define RTIT_STATUS_STOPPED BIT(5) +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define MSR_IA32_RTIT_ADDR0_A 0x00000580 +#define MSR_IA32_RTIT_ADDR0_B 0x00000581 +#define MSR_IA32_RTIT_ADDR1_A 0x00000582 +#define MSR_IA32_RTIT_ADDR1_B 0x00000583 +#define MSR_IA32_RTIT_ADDR2_A 0x00000584 +#define MSR_IA32_RTIT_ADDR2_B 0x00000585 +#define MSR_IA32_RTIT_ADDR3_A 0x00000586 +#define MSR_IA32_RTIT_ADDR3_B 0x00000587 #define MSR_IA32_RTIT_CR3_MATCH 0x00000572 #define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 #define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 @@ -205,6 +194,8 @@ #define MSR_CONFIG_TDP_CONTROL 0x0000064B #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C +#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D + #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 #define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A @@ -315,6 +306,9 @@ #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 @@ -328,6 +322,7 @@ #define MSR_F15H_PERF_CTR 0xc0010201 #define MSR_F15H_NB_PERF_CTL 0xc0010240 #define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_PTSC 0xc0010280 #define MSR_F15H_IC_CFG 0xc0011021 /* Fam 10h MSRs */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7a79ee2778b3..7dc1d8fef7fd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -84,7 +84,10 @@ static inline unsigned long long native_read_msr(unsigned int msr) { DECLARE_ARGS(val, low, high); - asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + asm volatile("1: rdmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) + : EAX_EDX_RET(val, low, high) : "c" (msr)); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); @@ -98,7 +101,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, asm volatile("2: rdmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err] ; jmp 1b\n\t" + "3: mov %[fault],%[err]\n\t" + "xorl %%eax, %%eax\n\t" + "xorl %%edx, %%edx\n\t" + "jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) @@ -108,10 +114,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, return EAX_EDX_VAL(val, low, high); } -static inline void native_write_msr(unsigned int msr, - unsigned low, unsigned high) +/* Can be uninlined because referenced by paravirt */ +notrace static inline void native_write_msr(unsigned int msr, + unsigned low, unsigned high) { - asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + : : "c" (msr), "a"(low), "d" (high) : "memory"); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index b94f6f64e23d..dbff1456d215 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,6 +24,7 @@ #define _ASM_X86_MTRR_H #include <uapi/asm/mtrr.h> +#include <asm/pat.h> /* @@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } +static inline void mtrr_bp_init(void) +{ + pat_disable("MTRRs disabled, skipping PAT initialization too."); +} #define mtrr_ap_init() do {} while (0) -#define mtrr_bp_init() do {} while (0) #define set_mtrr_aps_delayed_init() do {} while (0) #define mtrr_aps_init() do {} while (0) #define mtrr_bp_restore() do {} while (0) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 802dde30c928..cf8f619b305f 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -37,7 +37,10 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +#ifndef __pa #define __pa(x) __phys_addr((unsigned long)(x)) +#endif + #define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) /* __pa_symbol should be used for C visible symbols. This seems to be the official gcc blessed way to do such arithmetic. */ @@ -51,7 +54,9 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, #define __pa_symbol(x) \ __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) +#ifndef __va #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#endif #define __boot_va(x) __va(x) #define __boot_pa(x) __pa(x) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 4928cf0d5af0..d5c2f8b40faa 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -47,12 +47,10 @@ * are fully set up. If kernel ASLR is configured, it can extend the * kernel page table mapping, reducing the size of the modules area. */ -#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) -#if defined(CONFIG_RANDOMIZE_BASE) && \ - CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT -#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET +#if defined(CONFIG_RANDOMIZE_BASE) +#define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024) #else -#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) #endif #endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 601f1b8f9961..2970d22d7766 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -15,17 +15,6 @@ #include <linux/cpumask.h> #include <asm/frame.h> -static inline int paravirt_enabled(void) -{ - return pv_info.paravirt_enabled; -} - -static inline int paravirt_has_feature(unsigned int feature) -{ - WARN_ON_ONCE(!pv_info.paravirt_enabled); - return (pv_info.features & feature); -} - static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) { @@ -130,21 +119,31 @@ static inline void wbinvd(void) #define get_kernel_rpl() (pv_info.kernel_rpl) -static inline u64 paravirt_read_msr(unsigned msr, int *err) +static inline u64 paravirt_read_msr(unsigned msr) +{ + return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr); +} + +static inline void paravirt_write_msr(unsigned msr, + unsigned low, unsigned high) +{ + return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); +} + +static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) { - return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); + return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err); } -static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) +static inline int paravirt_write_msr_safe(unsigned msr, + unsigned low, unsigned high) { - return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); + return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high); } -/* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr, val1, val2) \ do { \ - int _err; \ - u64 _l = paravirt_read_msr(msr, &_err); \ + u64 _l = paravirt_read_msr(msr); \ val1 = (u32)_l; \ val2 = _l >> 32; \ } while (0) @@ -156,8 +155,7 @@ do { \ #define rdmsrl(msr, val) \ do { \ - int _err; \ - val = paravirt_read_msr(msr, &_err); \ + val = paravirt_read_msr(msr); \ } while (0) static inline void wrmsrl(unsigned msr, u64 val) @@ -165,23 +163,23 @@ static inline void wrmsrl(unsigned msr, u64 val) wrmsr(msr, (u32)val, (u32)(val>>32)); } -#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) +#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b) /* rdmsr with exception handling */ -#define rdmsr_safe(msr, a, b) \ -({ \ - int _err; \ - u64 _l = paravirt_read_msr(msr, &_err); \ - (*a) = (u32)_l; \ - (*b) = _l >> 32; \ - _err; \ +#define rdmsr_safe(msr, a, b) \ +({ \ + int _err; \ + u64 _l = paravirt_read_msr_safe(msr, &_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ + _err; \ }) static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) { int err; - *p = paravirt_read_msr(msr, &err); + *p = paravirt_read_msr_safe(msr, &err); return err; } diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index e8c2326478c8..7fa9e7740ba3 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -69,15 +69,9 @@ struct pv_info { u16 extra_user_64bit_cs; /* __USER_CS if none */ #endif - int paravirt_enabled; - unsigned int features; /* valid only if paravirt_enabled is set */ const char *name; }; -#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x) -/* Supported features */ -#define PV_SUPPORTED_RTC (1<<0) - struct pv_init_ops { /* * Patch may replace one of the defined code sequences with @@ -155,10 +149,16 @@ struct pv_cpu_ops { void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); - /* MSR, PMC and TSR operations. - err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ - u64 (*read_msr)(unsigned int msr, int *err); - int (*write_msr)(unsigned int msr, unsigned low, unsigned high); + /* Unsafe MSR operations. These will warn or panic on failure. */ + u64 (*read_msr)(unsigned int msr); + void (*write_msr)(unsigned int msr, unsigned low, unsigned high); + + /* + * Safe MSR operations. + * read sets err to 0 or -EIO. write returns 0 or -EIO. + */ + u64 (*read_msr_safe)(unsigned int msr, int *err); + int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high); u64 (*read_pmc)(int counter); diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index ca6c228d5e62..0b1ff4c1c14e 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -5,8 +5,8 @@ #include <asm/pgtable_types.h> bool pat_enabled(void); +void pat_disable(const char *reason); extern void pat_init(void); -void pat_init_cache_modes(u64); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 97f3242e133c..f86491a7bc9d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -183,7 +183,7 @@ static inline int pmd_trans_huge(pmd_t pmd) static inline int has_transparent_hugepage(void) { - return cpu_has_pse; + return boot_cpu_has(X86_FEATURE_PSE); } #ifdef __HAVE_ARCH_PTE_DEVMAP diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9264476f3d57..62c6cc3cc5d3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -388,9 +388,16 @@ struct thread_struct { unsigned long ip; #endif #ifdef CONFIG_X86_64 - unsigned long fs; + unsigned long fsbase; + unsigned long gsbase; +#else + /* + * XXX: this could presumably be unsigned short. Alternatively, + * 32-bit kernels could be taught to use fsindex instead. + */ + unsigned long fs; + unsigned long gs; #endif - unsigned long gs; /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; @@ -473,8 +480,6 @@ static inline unsigned long current_top_of_stack(void) #include <asm/paravirt.h> #else #define __cpuid native_cpuid -#define paravirt_enabled() 0 -#define paravirt_has(x) 0 static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index ceec86eb68e9..453744c1d347 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -99,26 +99,36 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +#define ____down_write(sem, slow_path) \ +({ \ + long tmp; \ + struct rw_semaphore* ret; \ + asm volatile("# beginning down_write\n\t" \ + LOCK_PREFIX " xadd %1,(%3)\n\t" \ + /* adds 0xffff0001, returns the old value */ \ + " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ + /* was the active mask 0 before? */\ + " jz 1f\n" \ + " call " slow_path "\n" \ + "1:\n" \ + "# ending down_write" \ + : "+m" (sem->count), "=d" (tmp), "=a" (ret) \ + : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ + : "memory", "cc"); \ + ret; \ +}) + +static inline void __down_write(struct rw_semaphore *sem) { - long tmp; - asm volatile("# beginning down_write\n\t" - LOCK_PREFIX " xadd %1,(%2)\n\t" - /* adds 0xffff0001, returns the old value */ - " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" - /* was the active mask 0 before? */ - " jz 1f\n" - " call call_rwsem_down_write_failed\n" - "1:\n" - "# ending down_write" - : "+m" (sem->count), "=d" (tmp) - : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) - : "memory", "cc"); + ____down_write(sem, "call_rwsem_down_write_failed"); } -static inline void __down_write(struct rw_semaphore *sem) +static inline int __down_write_killable(struct rw_semaphore *sem) { - __down_write_nested(sem, 0); + if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable"))) + return -EINTR; + + return 0; } /* diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 7d5a1929d76b..1549caa098f0 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -2,6 +2,7 @@ #define _ASM_X86_SEGMENT_H #include <linux/const.h> +#include <asm/alternative.h> /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -207,13 +208,6 @@ #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) -/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */ -#define FS_TLS 0 -#define GS_TLS 1 - -#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) -#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) - #endif #ifndef CONFIG_PARAVIRT @@ -249,10 +243,13 @@ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDL #endif /* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. + * Load a segment. Fall back on loading the zero segment if something goes + * wrong. This variant assumes that loading zero fully clears the segment. + * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any + * failure to fully clear the cached descriptor is only observable for + * FS and GS. */ -#define loadsegment(seg, value) \ +#define __loadsegment_simple(seg, value) \ do { \ unsigned short __val = (value); \ \ @@ -269,6 +266,38 @@ do { \ : "+r" (__val) : : "memory"); \ } while (0) +#define __loadsegment_ss(value) __loadsegment_simple(ss, (value)) +#define __loadsegment_ds(value) __loadsegment_simple(ds, (value)) +#define __loadsegment_es(value) __loadsegment_simple(es, (value)) + +#ifdef CONFIG_X86_32 + +/* + * On 32-bit systems, the hidden parts of FS and GS are unobservable if + * the selector is NULL, so there's no funny business here. + */ +#define __loadsegment_fs(value) __loadsegment_simple(fs, (value)) +#define __loadsegment_gs(value) __loadsegment_simple(gs, (value)) + +#else + +static inline void __loadsegment_fs(unsigned short value) +{ + asm volatile(" \n" + "1: movw %0, %%fs \n" + "2: \n" + + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs) + + : : "rm" (value) : "memory"); +} + +/* __loadsegment_gs is intentionally undefined. Use load_gs_index instead. */ + +#endif + +#define loadsegment(seg, value) __loadsegment_ ## seg (value) + /* * Save a segment register away: */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 11af24e09c8a..ac1d5da14734 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -6,6 +6,7 @@ #define COMMAND_LINE_SIZE 2048 #include <linux/linkage.h> +#include <asm/page_types.h> #ifdef __i386__ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 751bf4b7bf11..8f321a1b03a1 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -39,8 +39,7 @@ do { \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ \ - asm volatile("pushfl\n\t" /* save flags */ \ - "pushl %%ebp\n\t" /* save EBP */ \ + asm volatile("pushl %%ebp\n\t" /* save EBP */ \ "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ @@ -49,7 +48,6 @@ do { \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ "popl %%ebp\n\t" /* restore EBP */ \ - "popfl\n" /* restore flags */ \ \ /* output parameters */ \ : [prev_sp] "=m" (prev->thread.sp), \ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h new file mode 100644 index 000000000000..90395063383c --- /dev/null +++ b/arch/x86/include/asm/text-patching.h @@ -0,0 +1,40 @@ +#ifndef _ASM_X86_TEXT_PATCHING_H +#define _ASM_X86_TEXT_PATCHING_H + +#include <linux/types.h> +#include <linux/stddef.h> +#include <asm/ptrace.h> + +struct paravirt_patch_site; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); +#else +static inline void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) +{} +#define __parainstructions NULL +#define __parainstructions_end NULL +#endif + +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + +/* + * Clear and restore the kernel write-protection flag on the local CPU. + * Allows the kernel to edit read-only pages. + * Side-effect: any interrupt handler running between save and restore will have + * the ability to write to read-only pages. + * + * Warning: + * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and + * no thread can be preempted in the instructions being modified (no iret to an + * invalid instruction possible) or if the instructions are changed from a + * consistent state to another consistent state atomically. + * On the local CPU you need to be protected again NMI or MCE handlers seeing an + * inconsistent instruction while you patch. + */ +extern void *text_poke(void *addr, const void *opcode, size_t len); +extern int poke_int3_handler(struct pt_regs *regs); +extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); + +#endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ffae84df8a93..30c133ac05cd 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void) return true; } -static inline bool is_ia32_task(void) +static inline bool in_ia32_syscall(void) { #ifdef CONFIG_X86_32 return true; diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 1fde8d580a5b..4e5be94e079a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -181,7 +181,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (cpu_has_pge) + if (static_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 174c4212780a..7428697c5b8d 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -22,7 +22,7 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { #ifndef CONFIG_X86_TSC - if (!cpu_has_tsc) + if (!boot_cpu_has(X86_FEATURE_TSC)) return 0; #endif diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a969ae607be8..12f9653bde8d 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -108,9 +108,17 @@ struct exception_table_entry { #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ + do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->handler = (b)->handler + (delta); \ + (b)->handler = (tmp).handler - (delta); \ + } while (0) + extern int fixup_exception(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); -extern int early_fixup_exception(unsigned long *ip); +extern void early_fixup_exception(struct pt_regs *regs, int trapnr); /* * These are the main single-value transfer routines. They automatically diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 71605c7d5c5c..c852590254d5 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -51,15 +51,66 @@ enum { BIOS_STATUS_UNAVAIL = -EBUSY }; +/* Address map parameters */ +struct uv_gam_parameters { + u64 mmr_base; + u64 gru_base; + u8 mmr_shift; /* Convert PNode to MMR space offset */ + u8 gru_shift; /* Convert PNode to GRU space offset */ + u8 gpa_shift; /* Size of offset field in GRU phys addr */ + u8 unused1; +}; + +/* UV_TABLE_GAM_RANGE_ENTRY values */ +#define UV_GAM_RANGE_TYPE_UNUSED 0 /* End of table */ +#define UV_GAM_RANGE_TYPE_RAM 1 /* Normal RAM */ +#define UV_GAM_RANGE_TYPE_NVRAM 2 /* Non-volatile memory */ +#define UV_GAM_RANGE_TYPE_NV_WINDOW 3 /* NVMDIMM block window */ +#define UV_GAM_RANGE_TYPE_NV_MAILBOX 4 /* NVMDIMM mailbox */ +#define UV_GAM_RANGE_TYPE_HOLE 5 /* Unused address range */ +#define UV_GAM_RANGE_TYPE_MAX 6 + +/* The structure stores PA bits 56:26, for 64MB granularity */ +#define UV_GAM_RANGE_SHFT 26 /* 64MB */ + +struct uv_gam_range_entry { + char type; /* Entry type: GAM_RANGE_TYPE_UNUSED, etc. */ + char unused1; + u16 nasid; /* HNasid */ + u16 sockid; /* Socket ID, high bits of APIC ID */ + u16 pnode; /* Index to MMR and GRU spaces */ + u32 pxm; /* ACPI proximity domain number */ + u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */ +}; + +#define UV_SYSTAB_SIG "UVST" +#define UV_SYSTAB_VERSION_1 1 /* UV1/2/3 BIOS version */ +#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */ +#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */ +#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */ +#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2 + +#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */ +#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */ +#define UV_SYSTAB_TYPE_GAM_RNG_TBL 2 /* GAM entry table */ +#define UV_SYSTAB_TYPE_MAX 3 + /* * The UV system table describes specific firmware * capabilities available to the Linux kernel at runtime. */ struct uv_systab { - char signature[4]; /* must be "UVST" */ + char signature[4]; /* must be UV_SYSTAB_SIG */ u32 revision; /* distinguish different firmware revs */ u64 function; /* BIOS runtime callback function ptr */ + u32 size; /* systab size (starting with _VERSION_UV4) */ + struct { + u32 type:8; /* type of entry */ + u32 offset:24; /* byte offset from struct start to entry */ + } entry[1]; /* additional entries follow */ }; +extern struct uv_systab *uv_systab; +/* (... end of definitions from UV BIOS ...) */ enum { BIOS_FREQ_BASE_PLATFORM = 0, @@ -99,7 +150,11 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); +#ifdef CONFIG_EFI extern void uv_bios_init(void); +#else +void uv_bios_init(void) { } +#endif extern unsigned long sn_rtc_cycles_per_second; extern int uv_type; @@ -107,7 +162,7 @@ extern long sn_partition_id; extern long sn_coherency_id; extern long sn_region_size; extern long system_serial_number; -#define partition_coherence_id() (sn_coherency_id) +#define uv_partition_coherence_id() (sn_coherency_id) extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index fc808b83fccb..cc44d926c17e 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -598,7 +598,7 @@ struct bau_control { int timeout_tries; int ipi_attempts; int conseccompletes; - short nobau; + bool nobau; short baudisabled; short cpu; short osnode; diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index ea7074784cc4..097b80c989c4 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -16,9 +16,11 @@ #include <linux/percpu.h> #include <linux/timer.h> #include <linux/io.h> +#include <linux/topology.h> #include <asm/types.h> #include <asm/percpu.h> #include <asm/uv/uv_mmrs.h> +#include <asm/uv/bios.h> #include <asm/irq_vectors.h> #include <asm/io_apic.h> @@ -103,7 +105,6 @@ * processor APICID register. */ - /* * Maximum number of bricks in all partitions and in all coherency domains. * This is the total number of bricks accessible in the numalink fabric. It @@ -127,6 +128,7 @@ */ #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2) +/* System Controller Interface Reg info */ struct uv_scir_s { struct timer_list timer; unsigned long offset; @@ -137,71 +139,173 @@ struct uv_scir_s { unsigned char enabled; }; +/* GAM (globally addressed memory) range table */ +struct uv_gam_range_s { + u32 limit; /* PA bits 56:26 (GAM_RANGE_SHFT) */ + u16 nasid; /* node's global physical address */ + s8 base; /* entry index of node's base addr */ + u8 reserved; +}; + /* * The following defines attributes of the HUB chip. These attributes are - * frequently referenced and are kept in the per-cpu data areas of each cpu. - * They are kept together in a struct to minimize cache misses. + * frequently referenced and are kept in a common per hub struct. + * After setup, the struct is read only, so it should be readily + * available in the L3 cache on the cpu socket for the node. */ struct uv_hub_info_s { unsigned long global_mmr_base; + unsigned long global_mmr_shift; unsigned long gpa_mask; - unsigned int gnode_extra; + unsigned short *socket_to_node; + unsigned short *socket_to_pnode; + unsigned short *pnode_to_socket; + struct uv_gam_range_s *gr_table; + unsigned short min_socket; + unsigned short min_pnode; + unsigned char m_val; + unsigned char n_val; + unsigned char gr_table_len; unsigned char hub_revision; unsigned char apic_pnode_shift; + unsigned char gpa_shift; unsigned char m_shift; unsigned char n_lshift; + unsigned int gnode_extra; unsigned long gnode_upper; unsigned long lowmem_remap_top; unsigned long lowmem_remap_base; + unsigned long global_gru_base; + unsigned long global_gru_shift; unsigned short pnode; unsigned short pnode_mask; unsigned short coherency_domain_number; unsigned short numa_blade_id; - unsigned char blade_processor_id; - unsigned char m_val; - unsigned char n_val; + unsigned short nr_possible_cpus; + unsigned short nr_online_cpus; + short memory_nid; +}; + +/* CPU specific info with a pointer to the hub common info struct */ +struct uv_cpu_info_s { + void *p_uv_hub_info; + unsigned char blade_cpu_id; struct uv_scir_s scir; }; +DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info); + +#define uv_cpu_info this_cpu_ptr(&__uv_cpu_info) +#define uv_cpu_info_per(cpu) (&per_cpu(__uv_cpu_info, cpu)) + +#define uv_scir_info (&uv_cpu_info->scir) +#define uv_cpu_scir_info(cpu) (&uv_cpu_info_per(cpu)->scir) + +/* Node specific hub common info struct */ +extern void **__uv_hub_info_list; +static inline struct uv_hub_info_s *uv_hub_info_list(int node) +{ + return (struct uv_hub_info_s *)__uv_hub_info_list[node]; +} + +static inline struct uv_hub_info_s *_uv_hub_info(void) +{ + return (struct uv_hub_info_s *)uv_cpu_info->p_uv_hub_info; +} +#define uv_hub_info _uv_hub_info() -DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info this_cpu_ptr(&__uv_hub_info) -#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) +static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu) +{ + return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info; +} + +#define UV_HUB_INFO_VERSION 0x7150 +extern int uv_hub_info_version(void); +static inline int uv_hub_info_check(int version) +{ + if (uv_hub_info_version() == version) + return 0; + + pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n", + uv_hub_info_version(), version); + + BUG(); /* Catastrophic - cannot continue on unknown UV system */ +} +#define _uv_hub_info_check() uv_hub_info_check(UV_HUB_INFO_VERSION) /* - * Hub revisions less than UV2_HUB_REVISION_BASE are UV1 hubs. All UV2 - * hubs have revision numbers greater than or equal to UV2_HUB_REVISION_BASE. + * HUB revision ranges for each UV HUB architecture. * This is a software convention - NOT the hardware revision numbers in * the hub chip. */ #define UV1_HUB_REVISION_BASE 1 #define UV2_HUB_REVISION_BASE 3 #define UV3_HUB_REVISION_BASE 5 +#define UV4_HUB_REVISION_BASE 7 +#ifdef UV1_HUB_IS_SUPPORTED static inline int is_uv1_hub(void) { return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE; } +#else +static inline int is_uv1_hub(void) +{ + return 0; +} +#endif +#ifdef UV2_HUB_IS_SUPPORTED static inline int is_uv2_hub(void) { return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) && (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE)); } +#else +static inline int is_uv2_hub(void) +{ + return 0; +} +#endif +#ifdef UV3_HUB_IS_SUPPORTED +static inline int is_uv3_hub(void) +{ + return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) && + (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)); +} +#else static inline int is_uv3_hub(void) { - return uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE; + return 0; } +#endif -static inline int is_uv_hub(void) +#ifdef UV4_HUB_IS_SUPPORTED +static inline int is_uv4_hub(void) { - return uv_hub_info->hub_revision; + return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE; } +#else +static inline int is_uv4_hub(void) +{ + return 0; +} +#endif -/* code common to uv2 and uv3 only */ static inline int is_uvx_hub(void) { - return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; + if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) + return uv_hub_info->hub_revision; + + return 0; +} + +static inline int is_uv_hub(void) +{ +#ifdef UV1_HUB_IS_SUPPORTED + return uv_hub_info->hub_revision; +#endif + return is_uvx_hub(); } union uvh_apicid { @@ -243,24 +347,42 @@ union uvh_apicid { #define UV3_LOCAL_MMR_SIZE (32UL * 1024 * 1024) #define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) -#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \ - (is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \ - UV3_LOCAL_MMR_BASE)) -#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE :\ - (is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE :\ - UV3_GLOBAL_MMR32_BASE)) -#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ - (is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \ - UV3_LOCAL_MMR_SIZE)) -#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\ - (is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE :\ - UV3_GLOBAL_MMR32_SIZE)) +#define UV4_LOCAL_MMR_BASE 0xfa000000UL +#define UV4_GLOBAL_MMR32_BASE 0xfc000000UL +#define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV4_GLOBAL_MMR32_SIZE (16UL * 1024 * 1024) + +#define UV_LOCAL_MMR_BASE ( \ + is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \ + is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \ + is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \ + /*is_uv4_hub*/ UV4_LOCAL_MMR_BASE) + +#define UV_GLOBAL_MMR32_BASE ( \ + is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE : \ + is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \ + is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \ + /*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE) + +#define UV_LOCAL_MMR_SIZE ( \ + is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ + is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \ + is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \ + /*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE) + +#define UV_GLOBAL_MMR32_SIZE ( \ + is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE : \ + is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \ + is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \ + /*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE) + #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) #define UV_GLOBAL_GRU_MMR_BASE 0x4000000 #define UV_GLOBAL_MMR32_PNODE_SHIFT 15 -#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 +#define _UV_GLOBAL_MMR64_PNODE_SHIFT 26 +#define UV_GLOBAL_MMR64_PNODE_SHIFT (uv_hub_info->global_mmr_shift) #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) @@ -307,18 +429,74 @@ union uvh_apicid { * between socket virtual and socket physical addresses. */ +/* global bits offset - number of local address bits in gpa for this UV arch */ +static inline unsigned int uv_gpa_shift(void) +{ + return uv_hub_info->gpa_shift; +} +#define _uv_gpa_shift + +/* Find node that has the address range that contains global address */ +static inline struct uv_gam_range_s *uv_gam_range(unsigned long pa) +{ + struct uv_gam_range_s *gr = uv_hub_info->gr_table; + unsigned long pal = (pa & uv_hub_info->gpa_mask) >> UV_GAM_RANGE_SHFT; + int i, num = uv_hub_info->gr_table_len; + + if (gr) { + for (i = 0; i < num; i++, gr++) { + if (pal < gr->limit) + return gr; + } + } + pr_crit("UV: GAM Range for 0x%lx not found at %p!\n", pa, gr); + BUG(); +} + +/* Return base address of node that contains global address */ +static inline unsigned long uv_gam_range_base(unsigned long pa) +{ + struct uv_gam_range_s *gr = uv_gam_range(pa); + int base = gr->base; + + if (base < 0) + return 0UL; + + return uv_hub_info->gr_table[base].limit; +} + +/* socket phys RAM --> UV global NASID (UV4+) */ +static inline unsigned long uv_soc_phys_ram_to_nasid(unsigned long paddr) +{ + return uv_gam_range(paddr)->nasid; +} +#define _uv_soc_phys_ram_to_nasid + +/* socket virtual --> UV global NASID (UV4+) */ +static inline unsigned long uv_gpa_nasid(void *v) +{ + return uv_soc_phys_ram_to_nasid(__pa(v)); +} + /* socket phys RAM --> UV global physical address */ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) { + unsigned int m_val = uv_hub_info->m_val; + if (paddr < uv_hub_info->lowmem_remap_top) paddr |= uv_hub_info->lowmem_remap_base; paddr |= uv_hub_info->gnode_upper; - paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | - ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift); + if (m_val) + paddr = ((paddr << uv_hub_info->m_shift) + >> uv_hub_info->m_shift) | + ((paddr >> uv_hub_info->m_val) + << uv_hub_info->n_lshift); + else + paddr |= uv_soc_phys_ram_to_nasid(paddr) + << uv_hub_info->gpa_shift; return paddr; } - /* socket virtual --> UV global physical address */ static inline unsigned long uv_gpa(void *v) { @@ -338,54 +516,89 @@ static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) unsigned long paddr; unsigned long remap_base = uv_hub_info->lowmem_remap_base; unsigned long remap_top = uv_hub_info->lowmem_remap_top; + unsigned int m_val = uv_hub_info->m_val; + + if (m_val) + gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | + ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); - gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | - ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); paddr = gpa & uv_hub_info->gpa_mask; if (paddr >= remap_base && paddr < remap_base + remap_top) paddr -= remap_base; return paddr; } - -/* gpa -> pnode */ +/* gpa -> gnode */ static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) { - return gpa >> uv_hub_info->n_lshift; + unsigned int n_lshift = uv_hub_info->n_lshift; + + if (n_lshift) + return gpa >> n_lshift; + + return uv_gam_range(gpa)->nasid >> 1; } /* gpa -> pnode */ static inline int uv_gpa_to_pnode(unsigned long gpa) { - unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1; - - return uv_gpa_to_gnode(gpa) & n_mask; + return uv_gpa_to_gnode(gpa) & uv_hub_info->pnode_mask; } -/* gpa -> node offset*/ +/* gpa -> node offset */ static inline unsigned long uv_gpa_to_offset(unsigned long gpa) { - return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift; + unsigned int m_shift = uv_hub_info->m_shift; + + if (m_shift) + return (gpa << m_shift) >> m_shift; + + return (gpa & uv_hub_info->gpa_mask) - uv_gam_range_base(gpa); +} + +/* Convert socket to node */ +static inline int _uv_socket_to_node(int socket, unsigned short *s2nid) +{ + return s2nid ? s2nid[socket - uv_hub_info->min_socket] : socket; +} + +static inline int uv_socket_to_node(int socket) +{ + return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); } /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { - return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); -} + unsigned int m_val = uv_hub_info->m_val; + unsigned long base; + unsigned short sockid, node, *p2s; + if (m_val) + return __va(((unsigned long)pnode << m_val) | offset); -/* - * Extract a PNODE from an APICID (full apicid, not processor subset) - */ + p2s = uv_hub_info->pnode_to_socket; + sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; + node = uv_socket_to_node(sockid); + + /* limit address of previous socket is our base, except node 0 is 0 */ + if (!node) + return __va((unsigned long)offset); + + base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); + return __va(base << UV_GAM_RANGE_SHFT | offset); +} + +/* Extract/Convert a PNODE from an APICID (full apicid, not processor subset) */ static inline int uv_apicid_to_pnode(int apicid) { - return (apicid >> uv_hub_info->apic_pnode_shift); + int pnode = apicid >> uv_hub_info->apic_pnode_shift; + unsigned short *s2pn = uv_hub_info->socket_to_pnode; + + return s2pn ? s2pn[pnode - uv_hub_info->min_socket] : pnode; } -/* - * Convert an apicid to the socket number on the blade - */ +/* Convert an apicid to the socket number on the blade */ static inline int uv_apicid_to_socket(int apicid) { if (is_uv1_hub()) @@ -434,16 +647,6 @@ static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset return readq(uv_global_mmr64_address(pnode, offset)); } -/* - * Global MMR space addresses when referenced by the GRU. (GRU does - * NOT use socket addressing). - */ -static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset) -{ - return UV_GLOBAL_GRU_MMR_BASE | offset | - ((unsigned long)pnode << uv_hub_info->m_val); -} - static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) { writeb(val, uv_global_mmr64_address(pnode, offset)); @@ -483,27 +686,23 @@ static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) writeb(val, uv_local_mmr_address(offset)); } -/* - * Structures and definitions for converting between cpu, node, pnode, and blade - * numbers. - */ -struct uv_blade_info { - unsigned short nr_possible_cpus; - unsigned short nr_online_cpus; - unsigned short pnode; - short memory_nid; - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ - unsigned long nmi_count; /* obsolete, see uv_hub_nmi */ -}; -extern struct uv_blade_info *uv_blade_info; -extern short *uv_node_to_blade; -extern short *uv_cpu_to_blade; -extern short uv_possible_blades; - /* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */ static inline int uv_blade_processor_id(void) { - return uv_hub_info->blade_processor_id; + return uv_cpu_info->blade_cpu_id; +} + +/* Blade-local cpu number of cpu N. Numbered 0 .. <# cpus on the blade> */ +static inline int uv_cpu_blade_processor_id(int cpu) +{ + return uv_cpu_info_per(cpu)->blade_cpu_id; +} +#define _uv_cpu_blade_processor_id 1 /* indicate function available */ + +/* Blade number to Node number (UV1..UV4 is 1:1) */ +static inline int uv_blade_to_node(int blade) +{ + return blade; } /* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ @@ -512,55 +711,60 @@ static inline int uv_numa_blade_id(void) return uv_hub_info->numa_blade_id; } -/* Convert a cpu number to the the UV blade number */ -static inline int uv_cpu_to_blade_id(int cpu) +/* + * Convert linux node number to the UV blade number. + * .. Currently for UV1 thru UV4 the node and the blade are identical. + * .. If this changes then you MUST check references to this function! + */ +static inline int uv_node_to_blade_id(int nid) { - return uv_cpu_to_blade[cpu]; + return nid; } -/* Convert linux node number to the UV blade number */ -static inline int uv_node_to_blade_id(int nid) +/* Convert a cpu number to the the UV blade number */ +static inline int uv_cpu_to_blade_id(int cpu) { - return uv_node_to_blade[nid]; + return uv_node_to_blade_id(cpu_to_node(cpu)); } /* Convert a blade id to the PNODE of the blade */ static inline int uv_blade_to_pnode(int bid) { - return uv_blade_info[bid].pnode; + return uv_hub_info_list(uv_blade_to_node(bid))->pnode; } /* Nid of memory node on blade. -1 if no blade-local memory */ static inline int uv_blade_to_memory_nid(int bid) { - return uv_blade_info[bid].memory_nid; + return uv_hub_info_list(uv_blade_to_node(bid))->memory_nid; } /* Determine the number of possible cpus on a blade */ static inline int uv_blade_nr_possible_cpus(int bid) { - return uv_blade_info[bid].nr_possible_cpus; + return uv_hub_info_list(uv_blade_to_node(bid))->nr_possible_cpus; } /* Determine the number of online cpus on a blade */ static inline int uv_blade_nr_online_cpus(int bid) { - return uv_blade_info[bid].nr_online_cpus; + return uv_hub_info_list(uv_blade_to_node(bid))->nr_online_cpus; } /* Convert a cpu id to the PNODE of the blade containing the cpu */ static inline int uv_cpu_to_pnode(int cpu) { - return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode; + return uv_cpu_hub_info(cpu)->pnode; } /* Convert a linux node number to the PNODE of the blade */ static inline int uv_node_to_pnode(int nid) { - return uv_blade_info[uv_node_to_blade_id(nid)].pnode; + return uv_hub_info_list(nid)->pnode; } /* Maximum possible number of blades */ +extern short uv_possible_blades; static inline int uv_num_possible_blades(void) { return uv_possible_blades; @@ -578,9 +782,7 @@ extern void uv_nmi_setup(void); /* Newer SMM NMI handler, not present in all systems */ #define UVH_NMI_MMRX UVH_EVENT_OCCURRED0 #define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS -#define UVH_NMI_MMRX_SHIFT (is_uv1_hub() ? \ - UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :\ - UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT) +#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT #define UVH_NMI_MMRX_TYPE "EXTIO_INT0" /* Non-zero indicates newer SMM NMI handler present */ @@ -622,9 +824,9 @@ DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); /* Update SCIR state */ static inline void uv_set_scir_bits(unsigned char value) { - if (uv_hub_info->scir.state != value) { - uv_hub_info->scir.state = value; - uv_write_local_mmr8(uv_hub_info->scir.offset, value); + if (uv_scir_info->state != value) { + uv_scir_info->state = value; + uv_write_local_mmr8(uv_scir_info->offset, value); } } @@ -635,10 +837,10 @@ static inline unsigned long uv_scir_offset(int apicid) static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) { - if (uv_cpu_hub_info(cpu)->scir.state != value) { + if (uv_cpu_scir_info(cpu)->state != value) { uv_write_global_mmr8(uv_cpu_to_pnode(cpu), - uv_cpu_hub_info(cpu)->scir.offset, value); - uv_cpu_hub_info(cpu)->scir.state = value; + uv_cpu_scir_info(cpu)->offset, value); + uv_cpu_scir_info(cpu)->state = value; } } @@ -666,10 +868,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) /* * Get the minimum revision number of the hub chips within the partition. - * 1 - UV1 rev 1.0 initial silicon - * 2 - UV1 rev 2.0 production silicon - * 3 - UV2 rev 1.0 initial silicon - * 5 - UV3 rev 1.0 initial silicon + * (See UVx_HUB_REVISION_BASE above for specific values.) */ static inline int uv_get_min_hub_revision_id(void) { diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index ddd8db6b6e70..548d684a7960 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H @@ -18,10 +18,11 @@ * grouped by architecture types. * * UVH - definitions common to all UV hub types. - * UVXH - definitions common to all UV eXtended hub types (currently 2 & 3). + * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4). * UV1H - definitions specific to UV type 1 hub. * UV2H - definitions specific to UV type 2 hub. * UV3H - definitions specific to UV type 3 hub. + * UV4H - definitions specific to UV type 4 hub. * * So in general, MMR addresses and structures are identical on all hubs types. * These MMRs are identified as: @@ -32,19 +33,25 @@ * } s; * }; * - * If the MMR exists on all hub types but have different addresses: + * If the MMR exists on all hub types but have different addresses, + * use a conditional operator to define the value at runtime. * #define UV1Hxxx a * #define UV2Hxxx b * #define UV3Hxxx c + * #define UV4Hxxx d * #define UVHxxx (is_uv1_hub() ? UV1Hxxx : * (is_uv2_hub() ? UV2Hxxx : - * UV3Hxxx)) + * (is_uv3_hub() ? UV3Hxxx : + * UV4Hxxx)) * - * If the MMR exists on all hub types > 1 but have different addresses: + * If the MMR exists on all hub types > 1 but have different addresses, the + * variation using "UVX" as the prefix exists. * #define UV2Hxxx b * #define UV3Hxxx c - * #define UVXHxxx (is_uv2_hub() ? UV2Hxxx : - * UV3Hxxx)) + * #define UV4Hxxx d + * #define UVHxxx (is_uv2_hub() ? UV2Hxxx : + * (is_uv3_hub() ? UV3Hxxx : + * UV4Hxxx)) * * union uvh_xxx { * unsigned long v; @@ -56,6 +63,8 @@ * } s2; * struct uv3h_xxx_s { # Full UV3 definition (*) * } s3; + * struct uv4h_xxx_s { # Full UV4 definition (*) + * } s4; * }; * (* - if present and different than the common struct) * @@ -73,7 +82,7 @@ * } sn; * }; * - * (GEN Flags: mflags_opt= undefs=0 UV23=UVXH) + * (GEN Flags: mflags_opt= undefs=function UV234=UVXH) */ #define UV_MMR_ENABLE (1UL << 63) @@ -83,20 +92,36 @@ #define UV2_HUB_PART_NUMBER_X 0x1111 #define UV3_HUB_PART_NUMBER 0x9578 #define UV3_HUB_PART_NUMBER_X 0x4321 +#define UV4_HUB_PART_NUMBER 0x99a1 /* Compat: Indicate which UV Hubs are supported. */ +#define UV1_HUB_IS_SUPPORTED 1 #define UV2_HUB_IS_SUPPORTED 1 #define UV3_HUB_IS_SUPPORTED 1 +#define UV4_HUB_IS_SUPPORTED 1 + +/* Error function to catch undefined references */ +extern unsigned long uv_undefined(char *str); /* ========================================================================= */ /* UVH_BAU_DATA_BROADCAST */ /* ========================================================================= */ #define UVH_BAU_DATA_BROADCAST 0x61688UL -#define UVH_BAU_DATA_BROADCAST_32 0x440 + +#define UV1H_BAU_DATA_BROADCAST_32 0x440 +#define UV2H_BAU_DATA_BROADCAST_32 0x440 +#define UV3H_BAU_DATA_BROADCAST_32 0x440 +#define UV4H_BAU_DATA_BROADCAST_32 0x360 +#define UVH_BAU_DATA_BROADCAST_32 ( \ + is_uv1_hub() ? UV1H_BAU_DATA_BROADCAST_32 : \ + is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 : \ + is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 : \ + /*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32) #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL + union uvh_bau_data_broadcast_u { unsigned long v; struct uvh_bau_data_broadcast_s { @@ -109,7 +134,16 @@ union uvh_bau_data_broadcast_u { /* UVH_BAU_DATA_CONFIG */ /* ========================================================================= */ #define UVH_BAU_DATA_CONFIG 0x61680UL -#define UVH_BAU_DATA_CONFIG_32 0x438 + +#define UV1H_BAU_DATA_CONFIG_32 0x438 +#define UV2H_BAU_DATA_CONFIG_32 0x438 +#define UV3H_BAU_DATA_CONFIG_32 0x438 +#define UV4H_BAU_DATA_CONFIG_32 0x358 +#define UVH_BAU_DATA_CONFIG_32 ( \ + is_uv1_hub() ? UV1H_BAU_DATA_CONFIG_32 : \ + is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 : \ + is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 : \ + /*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32) #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 #define UVH_BAU_DATA_CONFIG_DM_SHFT 8 @@ -128,6 +162,7 @@ union uvh_bau_data_broadcast_u { #define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL #define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_bau_data_config_u { unsigned long v; struct uvh_bau_data_config_s { @@ -266,7 +301,6 @@ union uvh_bau_data_config_u { #define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL -#define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT 1 #define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2 #define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 #define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 @@ -275,55 +309,11 @@ union uvh_bau_data_config_u { #define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 #define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 #define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 -#define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 #define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 #define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 -#define UVXH_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 -#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 -#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 -#define UVXH_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 -#define UVXH_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 -#define UVXH_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 -#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 -#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 -#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 -#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 -#define UVXH_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 -#define UVXH_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 -#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 -#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 -#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 -#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 -#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 -#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 -#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 -#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 -#define UVXH_EVENT_OCCURRED0_IPI_INT_SHFT 53 -#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 -#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 -#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 -#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 -#define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 -#define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL #define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL #define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL #define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL @@ -332,54 +322,294 @@ union uvh_bau_data_config_u { #define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL #define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL #define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL -#define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL #define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL #define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL -#define UVXH_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL -#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL -#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL -#define UVXH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL -#define UVXH_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL -#define UVXH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL -#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL -#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL -#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL -#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL -#define UVXH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL -#define UVXH_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL -#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL -#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL -#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL -#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL -#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL -#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL -#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL -#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL -#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL -#define UVXH_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL -#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL -#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL -#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL -#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL -#define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + +#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + +#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + +#define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT 1 +#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10 +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17 +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18 +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19 +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20 +#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21 +#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22 +#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23 +#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24 +#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25 +#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26 +#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27 +#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28 +#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29 +#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30 +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31 +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32 +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33 +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34 +#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35 +#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36 +#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53 +#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54 +#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55 +#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56 +#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57 +#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58 +#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63 +#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0002000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0004000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0008000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0010000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0020000000000000UL +#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0040000000000000UL +#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0080000000000000UL +#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0100000000000000UL +#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0200000000000000UL +#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0400000000000000UL +#define UV4H_EVENT_OCCURRED0_IPI_INT_MASK 0x0800000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x1000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x2000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x4000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x8000000000000000UL + +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT ( \ + is_uv1_hub() ? UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \ + is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \ + is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \ + /*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT) union uvh_event_occurred0_u { unsigned long v; @@ -391,7 +621,7 @@ union uvh_event_occurred0_u { } s; struct uvxh_event_occurred0_s { unsigned long lb_hcerr:1; /* RW */ - unsigned long qp_hcerr:1; /* RW */ + unsigned long rsvd_1:1; unsigned long rh_hcerr:1; /* RW */ unsigned long lh0_hcerr:1; /* RW */ unsigned long lh1_hcerr:1; /* RW */ @@ -400,25 +630,51 @@ union uvh_event_occurred0_u { unsigned long ni0_hcerr:1; /* RW */ unsigned long ni1_hcerr:1; /* RW */ unsigned long lb_aoerr0:1; /* RW */ - unsigned long qp_aoerr0:1; /* RW */ + unsigned long rsvd_10:1; unsigned long rh_aoerr0:1; /* RW */ unsigned long lh0_aoerr0:1; /* RW */ unsigned long lh1_aoerr0:1; /* RW */ unsigned long gr0_aoerr0:1; /* RW */ unsigned long gr1_aoerr0:1; /* RW */ unsigned long xb_aoerr0:1; /* RW */ - unsigned long rt_aoerr0:1; /* RW */ + unsigned long rsvd_17_63:47; + } sx; + struct uv4h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long kt_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long kt_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rtq0_aoerr0:1; /* RW */ + unsigned long rtq1_aoerr0:1; /* RW */ + unsigned long rtq2_aoerr0:1; /* RW */ + unsigned long rtq3_aoerr0:1; /* RW */ unsigned long ni0_aoerr0:1; /* RW */ unsigned long ni1_aoerr0:1; /* RW */ unsigned long lb_aoerr1:1; /* RW */ - unsigned long qp_aoerr1:1; /* RW */ + unsigned long kt_aoerr1:1; /* RW */ unsigned long rh_aoerr1:1; /* RW */ unsigned long lh0_aoerr1:1; /* RW */ unsigned long lh1_aoerr1:1; /* RW */ unsigned long gr0_aoerr1:1; /* RW */ unsigned long gr1_aoerr1:1; /* RW */ unsigned long xb_aoerr1:1; /* RW */ - unsigned long rt_aoerr1:1; /* RW */ + unsigned long rtq0_aoerr1:1; /* RW */ + unsigned long rtq1_aoerr1:1; /* RW */ + unsigned long rtq2_aoerr1:1; /* RW */ + unsigned long rtq3_aoerr1:1; /* RW */ unsigned long ni0_aoerr1:1; /* RW */ unsigned long ni1_aoerr1:1; /* RW */ unsigned long system_shutdown_int:1; /* RW */ @@ -448,9 +704,7 @@ union uvh_event_occurred0_u { unsigned long extio_int1:1; /* RW */ unsigned long extio_int2:1; /* RW */ unsigned long extio_int3:1; /* RW */ - unsigned long profile_int:1; /* RW */ - unsigned long rsvd_59_63:5; - } sx; + } s4; }; /* ========================================================================= */ @@ -464,11 +718,21 @@ union uvh_event_occurred0_u { /* UVH_EXTIO_INT0_BROADCAST */ /* ========================================================================= */ #define UVH_EXTIO_INT0_BROADCAST 0x61448UL -#define UVH_EXTIO_INT0_BROADCAST_32 0x3f0 + +#define UV1H_EXTIO_INT0_BROADCAST_32 0x3f0 +#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0 +#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0 +#define UV4H_EXTIO_INT0_BROADCAST_32 0x310 +#define UVH_EXTIO_INT0_BROADCAST_32 ( \ + is_uv1_hub() ? UV1H_EXTIO_INT0_BROADCAST_32 : \ + is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 : \ + is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 : \ + /*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32) #define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0 #define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL + union uvh_extio_int0_broadcast_u { unsigned long v; struct uvh_extio_int0_broadcast_s { @@ -499,6 +763,7 @@ union uvh_extio_int0_broadcast_u { #define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr0_tlb_int0_config_u { unsigned long v; struct uvh_gr0_tlb_int0_config_s { @@ -537,6 +802,7 @@ union uvh_gr0_tlb_int0_config_u { #define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr0_tlb_int1_config_u { unsigned long v; struct uvh_gr0_tlb_int1_config_s { @@ -559,19 +825,18 @@ union uvh_gr0_tlb_int1_config_u { #define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL #define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL #define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL -#define UVH_GR0_TLB_MMR_CONTROL \ - (is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \ - (is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \ - UV3H_GR0_TLB_MMR_CONTROL)) +#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL +#define UVH_GR0_TLB_MMR_CONTROL ( \ + is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \ + is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \ + is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL : \ + /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL) #define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 #define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 #define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 #define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 #define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL #define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL #define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL #define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL @@ -601,14 +866,11 @@ union uvh_gr0_tlb_int1_config_u { #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL #define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 #define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL #define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL #define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL #define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL @@ -651,12 +913,45 @@ union uvh_gr0_tlb_int1_config_u { #define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL #define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL +#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 13 +#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 +#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 +#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59 +#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL +#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL +#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL +#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL +#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL +#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL + +#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK ( \ + is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \ + is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \ + is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \ + /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK) +#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK ( \ + is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \ + is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \ + is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \ + /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK) +#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT ( \ + is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \ + is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \ + is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \ + /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT) + union uvh_gr0_tlb_mmr_control_u { unsigned long v; struct uvh_gr0_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; + unsigned long rsvd_0_15:16; unsigned long auto_valid_en:1; /* RW */ unsigned long rsvd_17_19:3; unsigned long mmr_hash_index_en:1; /* RW */ @@ -690,9 +985,7 @@ union uvh_gr0_tlb_mmr_control_u { unsigned long rsvd_61_63:3; } s1; struct uvxh_gr0_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; + unsigned long rsvd_0_15:16; unsigned long auto_valid_en:1; /* RW */ unsigned long rsvd_17_19:3; unsigned long mmr_hash_index_en:1; /* RW */ @@ -703,8 +996,7 @@ union uvh_gr0_tlb_mmr_control_u { unsigned long rsvd_33_47:15; unsigned long rsvd_48:1; unsigned long rsvd_49_51:3; - unsigned long rsvd_52:1; - unsigned long rsvd_53_63:11; + unsigned long rsvd_52_63:12; } sx; struct uv2h_gr0_tlb_mmr_control_s { unsigned long index:12; /* RW */ @@ -741,6 +1033,24 @@ union uvh_gr0_tlb_mmr_control_u { unsigned long undef_52:1; /* Undefined */ unsigned long rsvd_53_63:11; } s3; + struct uv4h_gr0_tlb_mmr_control_s { + unsigned long index:13; /* RW */ + unsigned long mem_sel:2; /* RW */ + unsigned long rsvd_15:1; + unsigned long auto_valid_en:1; /* RW */ + unsigned long rsvd_17_19:3; + unsigned long mmr_hash_index_en:1; /* RW */ + unsigned long ecc_sel:1; /* RW */ + unsigned long rsvd_22_29:8; + unsigned long mmr_write:1; /* WP */ + unsigned long mmr_read:1; /* WP */ + unsigned long mmr_op_done:1; /* RW */ + unsigned long rsvd_33_47:15; + unsigned long undef_48:1; /* Undefined */ + unsigned long rsvd_49_51:3; + unsigned long rsvd_52_58:7; + unsigned long page_size:5; /* RW */ + } s4; }; /* ========================================================================= */ @@ -749,19 +1059,14 @@ union uvh_gr0_tlb_mmr_control_u { #define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL #define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL #define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL -#define UVH_GR0_TLB_MMR_READ_DATA_HI \ - (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \ - (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \ - UV3H_GR0_TLB_MMR_READ_DATA_HI)) +#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL +#define UVH_GR0_TLB_MMR_READ_DATA_HI ( \ + is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \ + is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \ + is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI : \ + /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI) #define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL #define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 #define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 @@ -773,13 +1078,6 @@ union uvh_gr0_tlb_mmr_control_u { #define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL #define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL #define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 #define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 @@ -803,15 +1101,24 @@ union uvh_gr0_tlb_mmr_control_u { #define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL #define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT 34 +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 49 +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51 +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52 +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53 +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL + + union uvh_gr0_tlb_mmr_read_data_hi_u { unsigned long v; - struct uvh_gr0_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } s; struct uv1h_gr0_tlb_mmr_read_data_hi_s { unsigned long pfn:41; /* RO */ unsigned long gaa:2; /* RO */ @@ -819,13 +1126,6 @@ union uvh_gr0_tlb_mmr_read_data_hi_u { unsigned long larger:1; /* RO */ unsigned long rsvd_45_63:19; } s1; - struct uvxh_gr0_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } sx; struct uv2h_gr0_tlb_mmr_read_data_hi_s { unsigned long pfn:41; /* RO */ unsigned long gaa:2; /* RO */ @@ -842,6 +1142,16 @@ union uvh_gr0_tlb_mmr_read_data_hi_u { unsigned long undef_46_54:9; /* Undefined */ unsigned long way_ecc:9; /* RO */ } s3; + struct uv4h_gr0_tlb_mmr_read_data_hi_s { + unsigned long pfn:34; /* RO */ + unsigned long pnid:15; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long aa_ext:1; /* RO */ + unsigned long undef_54:1; /* Undefined */ + unsigned long way_ecc:9; /* RO */ + } s4; }; /* ========================================================================= */ @@ -850,10 +1160,12 @@ union uvh_gr0_tlb_mmr_read_data_hi_u { #define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL #define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL #define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL -#define UVH_GR0_TLB_MMR_READ_DATA_LO \ - (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \ - (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \ - UV3H_GR0_TLB_MMR_READ_DATA_LO)) +#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL +#define UVH_GR0_TLB_MMR_READ_DATA_LO ( \ + is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \ + is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \ + is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO : \ + /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO) #define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 #define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 @@ -890,6 +1202,14 @@ union uvh_gr0_tlb_mmr_read_data_hi_u { #define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL #define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + + union uvh_gr0_tlb_mmr_read_data_lo_u { unsigned long v; struct uvh_gr0_tlb_mmr_read_data_lo_s { @@ -917,12 +1237,25 @@ union uvh_gr0_tlb_mmr_read_data_lo_u { unsigned long asid:24; /* RO */ unsigned long valid:1; /* RO */ } s3; + struct uv4h_gr0_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s4; }; /* ========================================================================= */ /* UVH_GR1_TLB_INT0_CONFIG */ /* ========================================================================= */ -#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL +#define UVH_GR1_TLB_INT0_CONFIG ( \ + is_uv1_hub() ? UV1H_GR1_TLB_INT0_CONFIG : \ + is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG : \ + is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG : \ + /*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG) #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 #define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 @@ -941,6 +1274,7 @@ union uvh_gr0_tlb_mmr_read_data_lo_u { #define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr1_tlb_int0_config_u { unsigned long v; struct uvh_gr1_tlb_int0_config_s { @@ -960,7 +1294,15 @@ union uvh_gr1_tlb_int0_config_u { /* ========================================================================= */ /* UVH_GR1_TLB_INT1_CONFIG */ /* ========================================================================= */ -#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL +#define UVH_GR1_TLB_INT1_CONFIG ( \ + is_uv1_hub() ? UV1H_GR1_TLB_INT1_CONFIG : \ + is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG : \ + is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG : \ + /*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG) #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 #define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 @@ -979,6 +1321,7 @@ union uvh_gr1_tlb_int0_config_u { #define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr1_tlb_int1_config_u { unsigned long v; struct uvh_gr1_tlb_int1_config_s { @@ -1001,19 +1344,18 @@ union uvh_gr1_tlb_int1_config_u { #define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL #define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL #define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL -#define UVH_GR1_TLB_MMR_CONTROL \ - (is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \ - (is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \ - UV3H_GR1_TLB_MMR_CONTROL)) +#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL +#define UVH_GR1_TLB_MMR_CONTROL ( \ + is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \ + is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \ + is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL : \ + /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL) #define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 #define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 #define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 #define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 #define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UVH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL #define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL #define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL #define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL @@ -1043,14 +1385,11 @@ union uvh_gr1_tlb_int1_config_u { #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL #define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 #define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL #define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL #define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL #define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL @@ -1093,12 +1432,30 @@ union uvh_gr1_tlb_int1_config_u { #define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL #define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL +#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 13 +#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 +#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 +#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59 +#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL +#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL +#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL +#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL +#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL +#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL + + union uvh_gr1_tlb_mmr_control_u { unsigned long v; struct uvh_gr1_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; + unsigned long rsvd_0_15:16; unsigned long auto_valid_en:1; /* RW */ unsigned long rsvd_17_19:3; unsigned long mmr_hash_index_en:1; /* RW */ @@ -1132,9 +1489,7 @@ union uvh_gr1_tlb_mmr_control_u { unsigned long rsvd_61_63:3; } s1; struct uvxh_gr1_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; + unsigned long rsvd_0_15:16; unsigned long auto_valid_en:1; /* RW */ unsigned long rsvd_17_19:3; unsigned long mmr_hash_index_en:1; /* RW */ @@ -1145,8 +1500,7 @@ union uvh_gr1_tlb_mmr_control_u { unsigned long rsvd_33_47:15; unsigned long rsvd_48:1; unsigned long rsvd_49_51:3; - unsigned long rsvd_52:1; - unsigned long rsvd_53_63:11; + unsigned long rsvd_52_63:12; } sx; struct uv2h_gr1_tlb_mmr_control_s { unsigned long index:12; /* RW */ @@ -1183,6 +1537,24 @@ union uvh_gr1_tlb_mmr_control_u { unsigned long undef_52:1; /* Undefined */ unsigned long rsvd_53_63:11; } s3; + struct uv4h_gr1_tlb_mmr_control_s { + unsigned long index:13; /* RW */ + unsigned long mem_sel:2; /* RW */ + unsigned long rsvd_15:1; + unsigned long auto_valid_en:1; /* RW */ + unsigned long rsvd_17_19:3; + unsigned long mmr_hash_index_en:1; /* RW */ + unsigned long ecc_sel:1; /* RW */ + unsigned long rsvd_22_29:8; + unsigned long mmr_write:1; /* WP */ + unsigned long mmr_read:1; /* WP */ + unsigned long mmr_op_done:1; /* RW */ + unsigned long rsvd_33_47:15; + unsigned long undef_48:1; /* Undefined */ + unsigned long rsvd_49_51:3; + unsigned long rsvd_52_58:7; + unsigned long page_size:5; /* RW */ + } s4; }; /* ========================================================================= */ @@ -1191,19 +1563,14 @@ union uvh_gr1_tlb_mmr_control_u { #define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL #define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL #define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL -#define UVH_GR1_TLB_MMR_READ_DATA_HI \ - (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \ - (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \ - UV3H_GR1_TLB_MMR_READ_DATA_HI)) +#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL +#define UVH_GR1_TLB_MMR_READ_DATA_HI ( \ + is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \ + is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \ + is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI : \ + /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI) #define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL #define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 #define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 @@ -1215,13 +1582,6 @@ union uvh_gr1_tlb_mmr_control_u { #define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL #define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL #define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 #define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 @@ -1245,15 +1605,24 @@ union uvh_gr1_tlb_mmr_control_u { #define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL #define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT 34 +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 49 +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51 +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52 +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53 +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL + + union uvh_gr1_tlb_mmr_read_data_hi_u { unsigned long v; - struct uvh_gr1_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } s; struct uv1h_gr1_tlb_mmr_read_data_hi_s { unsigned long pfn:41; /* RO */ unsigned long gaa:2; /* RO */ @@ -1261,13 +1630,6 @@ union uvh_gr1_tlb_mmr_read_data_hi_u { unsigned long larger:1; /* RO */ unsigned long rsvd_45_63:19; } s1; - struct uvxh_gr1_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } sx; struct uv2h_gr1_tlb_mmr_read_data_hi_s { unsigned long pfn:41; /* RO */ unsigned long gaa:2; /* RO */ @@ -1284,6 +1646,16 @@ union uvh_gr1_tlb_mmr_read_data_hi_u { unsigned long undef_46_54:9; /* Undefined */ unsigned long way_ecc:9; /* RO */ } s3; + struct uv4h_gr1_tlb_mmr_read_data_hi_s { + unsigned long pfn:34; /* RO */ + unsigned long pnid:15; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long aa_ext:1; /* RO */ + unsigned long undef_54:1; /* Undefined */ + unsigned long way_ecc:9; /* RO */ + } s4; }; /* ========================================================================= */ @@ -1292,10 +1664,12 @@ union uvh_gr1_tlb_mmr_read_data_hi_u { #define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL #define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL #define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL -#define UVH_GR1_TLB_MMR_READ_DATA_LO \ - (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \ - (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \ - UV3H_GR1_TLB_MMR_READ_DATA_LO)) +#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL +#define UVH_GR1_TLB_MMR_READ_DATA_LO ( \ + is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \ + is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \ + is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO : \ + /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO) #define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 #define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 @@ -1332,6 +1706,14 @@ union uvh_gr1_tlb_mmr_read_data_hi_u { #define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL #define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + + union uvh_gr1_tlb_mmr_read_data_lo_u { unsigned long v; struct uvh_gr1_tlb_mmr_read_data_lo_s { @@ -1359,6 +1741,11 @@ union uvh_gr1_tlb_mmr_read_data_lo_u { unsigned long asid:24; /* RO */ unsigned long valid:1; /* RO */ } s3; + struct uv4h_gr1_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s4; }; /* ========================================================================= */ @@ -1369,6 +1756,7 @@ union uvh_gr1_tlb_mmr_read_data_lo_u { #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + union uvh_int_cmpb_u { unsigned long v; struct uvh_int_cmpb_s { @@ -1382,12 +1770,14 @@ union uvh_int_cmpb_u { /* ========================================================================= */ #define UVH_INT_CMPC 0x22100UL + #define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 #define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL #define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 #define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL + union uvh_int_cmpc_u { unsigned long v; struct uvh_int_cmpc_s { @@ -1401,12 +1791,14 @@ union uvh_int_cmpc_u { /* ========================================================================= */ #define UVH_INT_CMPD 0x22180UL + #define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 #define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL #define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 #define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL + union uvh_int_cmpd_u { unsigned long v; struct uvh_int_cmpd_s { @@ -1419,7 +1811,16 @@ union uvh_int_cmpd_u { /* UVH_IPI_INT */ /* ========================================================================= */ #define UVH_IPI_INT 0x60500UL -#define UVH_IPI_INT_32 0x348 + +#define UV1H_IPI_INT_32 0x348 +#define UV2H_IPI_INT_32 0x348 +#define UV3H_IPI_INT_32 0x348 +#define UV4H_IPI_INT_32 0x268 +#define UVH_IPI_INT_32 ( \ + is_uv1_hub() ? UV1H_IPI_INT_32 : \ + is_uv2_hub() ? UV2H_IPI_INT_32 : \ + is_uv3_hub() ? UV3H_IPI_INT_32 : \ + /*is_uv4_hub*/ UV4H_IPI_INT_32) #define UVH_IPI_INT_VECTOR_SHFT 0 #define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 @@ -1432,6 +1833,7 @@ union uvh_int_cmpd_u { #define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL #define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL + union uvh_ipi_int_u { unsigned long v; struct uvh_ipi_int_s { @@ -1448,103 +1850,269 @@ union uvh_ipi_int_u { /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST") +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST ( \ + is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \ + is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \ + is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \ + /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST) #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + + +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + union uvh_lb_bau_intd_payload_queue_first_u { unsigned long v; - struct uvh_lb_bau_intd_payload_queue_first_s { + struct uv1h_lb_bau_intd_payload_queue_first_s { unsigned long rsvd_0_3:4; unsigned long address:39; /* RW */ unsigned long rsvd_43_48:6; unsigned long node_id:14; /* RW */ unsigned long rsvd_63:1; - } s; + } s1; + struct uv2h_lb_bau_intd_payload_queue_first_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s2; + struct uv3h_lb_bau_intd_payload_queue_first_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST") +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST ( \ + is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \ + is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \ + is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \ + /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST) #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + + +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + union uvh_lb_bau_intd_payload_queue_last_u { unsigned long v; - struct uvh_lb_bau_intd_payload_queue_last_s { + struct uv1h_lb_bau_intd_payload_queue_last_s { unsigned long rsvd_0_3:4; unsigned long address:39; /* RW */ unsigned long rsvd_43_63:21; - } s; + } s1; + struct uv2h_lb_bau_intd_payload_queue_last_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s2; + struct uv3h_lb_bau_intd_payload_queue_last_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL") +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL ( \ + is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \ + is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \ + is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \ + /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL) #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + + +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + union uvh_lb_bau_intd_payload_queue_tail_u { unsigned long v; - struct uvh_lb_bau_intd_payload_queue_tail_s { + struct uv1h_lb_bau_intd_payload_queue_tail_s { unsigned long rsvd_0_3:4; unsigned long address:39; /* RW */ unsigned long rsvd_43_63:21; - } s; + } s1; + struct uv2h_lb_bau_intd_payload_queue_tail_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s2; + struct uv3h_lb_bau_intd_payload_queue_tail_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE") +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE ( \ + is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \ + is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \ + is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \ + /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE) #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + + +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + union uvh_lb_bau_intd_software_acknowledge_u { unsigned long v; - struct uvh_lb_bau_intd_software_acknowledge_s { + struct uv1h_lb_bau_intd_software_acknowledge_s { unsigned long pending_0:1; /* RW, W1C */ unsigned long pending_1:1; /* RW, W1C */ unsigned long pending_2:1; /* RW, W1C */ @@ -1562,27 +2130,84 @@ union uvh_lb_bau_intd_software_acknowledge_u { unsigned long timeout_6:1; /* RW, W1C */ unsigned long timeout_7:1; /* RW, W1C */ unsigned long rsvd_16_63:48; - } s; + } s1; + struct uv2h_lb_bau_intd_software_acknowledge_s { + unsigned long pending_0:1; /* RW */ + unsigned long pending_1:1; /* RW */ + unsigned long pending_2:1; /* RW */ + unsigned long pending_3:1; /* RW */ + unsigned long pending_4:1; /* RW */ + unsigned long pending_5:1; /* RW */ + unsigned long pending_6:1; /* RW */ + unsigned long pending_7:1; /* RW */ + unsigned long timeout_0:1; /* RW */ + unsigned long timeout_1:1; /* RW */ + unsigned long timeout_2:1; /* RW */ + unsigned long timeout_3:1; /* RW */ + unsigned long timeout_4:1; /* RW */ + unsigned long timeout_5:1; /* RW */ + unsigned long timeout_6:1; /* RW */ + unsigned long timeout_7:1; /* RW */ + unsigned long rsvd_16_63:48; + } s2; + struct uv3h_lb_bau_intd_software_acknowledge_s { + unsigned long pending_0:1; /* RW */ + unsigned long pending_1:1; /* RW */ + unsigned long pending_2:1; /* RW */ + unsigned long pending_3:1; /* RW */ + unsigned long pending_4:1; /* RW */ + unsigned long pending_5:1; /* RW */ + unsigned long pending_6:1; /* RW */ + unsigned long pending_7:1; /* RW */ + unsigned long timeout_0:1; /* RW */ + unsigned long timeout_1:1; /* RW */ + unsigned long timeout_2:1; /* RW */ + unsigned long timeout_3:1; /* RW */ + unsigned long timeout_4:1; /* RW */ + unsigned long timeout_5:1; /* RW */ + unsigned long timeout_6:1; /* RW */ + unsigned long timeout_7:1; /* RW */ + unsigned long rsvd_16_63:48; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS") +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS ( \ + is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \ + is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \ + is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \ + /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS) #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 /* ========================================================================= */ /* UVH_LB_BAU_MISC_CONTROL */ /* ========================================================================= */ -#define UVH_LB_BAU_MISC_CONTROL 0x320170UL #define UV1H_LB_BAU_MISC_CONTROL 0x320170UL #define UV2H_LB_BAU_MISC_CONTROL 0x320170UL #define UV3H_LB_BAU_MISC_CONTROL 0x320170UL -#define UVH_LB_BAU_MISC_CONTROL_32 0xa10 -#define UV1H_LB_BAU_MISC_CONTROL_32 0x320170UL -#define UV2H_LB_BAU_MISC_CONTROL_32 0x320170UL -#define UV3H_LB_BAU_MISC_CONTROL_32 0x320170UL +#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL +#define UVH_LB_BAU_MISC_CONTROL ( \ + is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL : \ + is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL : \ + is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL : \ + /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL) + +#define UV1H_LB_BAU_MISC_CONTROL_32 0xa10 +#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10 +#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10 +#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18 +#define UVH_LB_BAU_MISC_CONTROL_32 ( \ + is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_32 : \ + is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 : \ + is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32) #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 @@ -1590,8 +2215,6 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 -#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 @@ -1606,8 +2229,6 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL -#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL @@ -1656,8 +2277,6 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 #define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 #define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 -#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 #define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 @@ -1679,8 +2298,6 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL #define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL #define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL -#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL #define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL @@ -1797,6 +2414,88 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL #define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL +#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT 15 +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 +#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 +#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36 +#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT 37 +#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38 +#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46 +#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK 0x00000000000f8000UL +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL +#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL +#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL +#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL +#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK 0x0000002000000000UL +#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL +#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL +#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK \ + uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK") +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK ( \ + is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \ + is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \ + is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \ + /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK) +#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT \ + uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT") +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT ( \ + is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \ + is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \ + is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \ + /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT) +#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK \ + uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK") +#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK ( \ + is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \ + is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \ + is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \ + /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK) +#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT \ + uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT") +#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT ( \ + is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \ + is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \ + is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \ + /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT) + union uvh_lb_bau_misc_control_u { unsigned long v; struct uvh_lb_bau_misc_control_s { @@ -1806,8 +2505,7 @@ union uvh_lb_bau_misc_control_u { unsigned long force_lock_nop:1; /* RW */ unsigned long qpi_agent_presence_vector:3; /* RW */ unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long enable_intd_soft_ack_mode:1; /* RW */ - unsigned long intd_soft_ack_timeout_period:4; /* RW */ + unsigned long rsvd_15_19:5; unsigned long enable_dual_mapping_mode:1; /* RW */ unsigned long vga_io_port_decode_enable:1; /* RW */ unsigned long vga_io_port_16_bit_decode:1; /* RW */ @@ -1844,8 +2542,7 @@ union uvh_lb_bau_misc_control_u { unsigned long force_lock_nop:1; /* RW */ unsigned long qpi_agent_presence_vector:3; /* RW */ unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long enable_intd_soft_ack_mode:1; /* RW */ - unsigned long intd_soft_ack_timeout_period:4; /* RW */ + unsigned long rsvd_15_19:5; unsigned long enable_dual_mapping_mode:1; /* RW */ unsigned long vga_io_port_decode_enable:1; /* RW */ unsigned long vga_io_port_16_bit_decode:1; /* RW */ @@ -1918,13 +2615,59 @@ union uvh_lb_bau_misc_control_u { unsigned long rsvd_46_47:2; unsigned long fun:16; /* RW */ } s3; + struct uv4h_lb_bau_misc_control_s { + unsigned long rejection_delay:8; /* RW */ + unsigned long apic_mode:1; /* RW */ + unsigned long force_broadcast:1; /* RW */ + unsigned long force_lock_nop:1; /* RW */ + unsigned long qpi_agent_presence_vector:3; /* RW */ + unsigned long descriptor_fetch_mode:1; /* RW */ + unsigned long rsvd_15_19:5; + unsigned long enable_dual_mapping_mode:1; /* RW */ + unsigned long vga_io_port_decode_enable:1; /* RW */ + unsigned long vga_io_port_16_bit_decode:1; /* RW */ + unsigned long suppress_dest_registration:1; /* RW */ + unsigned long programmed_initial_priority:3; /* RW */ + unsigned long use_incoming_priority:1; /* RW */ + unsigned long enable_programmed_initial_priority:1;/* RW */ + unsigned long enable_automatic_apic_mode_selection:1;/* RW */ + unsigned long apic_mode_status:1; /* RO */ + unsigned long suppress_interrupts_to_self:1; /* RW */ + unsigned long enable_lock_based_system_flush:1;/* RW */ + unsigned long enable_extended_sb_status:1; /* RW */ + unsigned long suppress_int_prio_udt_to_self:1;/* RW */ + unsigned long use_legacy_descriptor_formats:1;/* RW */ + unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */ + unsigned long rsvd_37:1; + unsigned long thread_kill_timebase:8; /* RW */ + unsigned long address_interleave_select:1; /* RW */ + unsigned long rsvd_47:1; + unsigned long fun:16; /* RW */ + } s4; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL) + +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 +#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32) #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 @@ -1933,6 +2676,7 @@ union uvh_lb_bau_misc_control_u { #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL + union uvh_lb_bau_sb_activation_control_u { unsigned long v; struct uvh_lb_bau_sb_activation_control_s { @@ -1946,12 +2690,30 @@ union uvh_lb_bau_sb_activation_control_u { /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0) + +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32) #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL + union uvh_lb_bau_sb_activation_status_0_u { unsigned long v; struct uvh_lb_bau_sb_activation_status_0_s { @@ -1962,12 +2724,30 @@ union uvh_lb_bau_sb_activation_status_0_u { /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1) + +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32) #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL + union uvh_lb_bau_sb_activation_status_1_u { unsigned long v; struct uvh_lb_bau_sb_activation_status_1_s { @@ -1978,23 +2758,55 @@ union uvh_lb_bau_sb_activation_status_1_u { /* ========================================================================= */ /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE) + +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 +#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0 +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32) #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL + + +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL + +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL + +#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL + + union uvh_lb_bau_sb_descriptor_base_u { unsigned long v; struct uvh_lb_bau_sb_descriptor_base_s { unsigned long rsvd_0_11:12; - unsigned long page_address:31; /* RW */ - unsigned long rsvd_43_48:6; + unsigned long rsvd_12_48:37; unsigned long node_id:14; /* RW */ unsigned long rsvd_63:1; } s; + struct uv4h_lb_bau_sb_descriptor_base_s { + unsigned long rsvd_0_11:12; + unsigned long page_address:34; /* RW */ + unsigned long rsvd_46_48:3; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s4; }; /* ========================================================================= */ @@ -2004,6 +2816,7 @@ union uvh_lb_bau_sb_descriptor_base_u { #define UV1H_NODE_ID 0x0UL #define UV2H_NODE_ID 0x0UL #define UV3H_NODE_ID 0x0UL +#define UV4H_NODE_ID 0x0UL #define UVH_NODE_ID_FORCE1_SHFT 0 #define UVH_NODE_ID_MANUFACTURER_SHFT 1 @@ -2080,6 +2893,26 @@ union uvh_lb_bau_sb_descriptor_base_u { #define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL #define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL +#define UV4H_NODE_ID_FORCE1_SHFT 0 +#define UV4H_NODE_ID_MANUFACTURER_SHFT 1 +#define UV4H_NODE_ID_PART_NUMBER_SHFT 12 +#define UV4H_NODE_ID_REVISION_SHFT 28 +#define UV4H_NODE_ID_NODE_ID_SHFT 32 +#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48 +#define UV4H_NODE_ID_RESERVED_2_SHFT 49 +#define UV4H_NODE_ID_NODES_PER_BIT_SHFT 50 +#define UV4H_NODE_ID_NI_PORT_SHFT 57 +#define UV4H_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UV4H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UV4H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UV4H_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UV4H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL +#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL +#define UV4H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL +#define UV4H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL + + union uvh_node_id_u { unsigned long v; struct uvh_node_id_s { @@ -2137,17 +2970,40 @@ union uvh_node_id_u { unsigned long ni_port:5; /* RO */ unsigned long rsvd_62_63:2; } s3; + struct uv4h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47:1; + unsigned long router_select:1; /* RO */ + unsigned long rsvd_49:1; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } s4; }; /* ========================================================================= */ /* UVH_NODE_PRESENT_TABLE */ /* ========================================================================= */ #define UVH_NODE_PRESENT_TABLE 0x1400UL -#define UVH_NODE_PRESENT_TABLE_DEPTH 16 + +#define UV1H_NODE_PRESENT_TABLE_DEPTH 16 +#define UV2H_NODE_PRESENT_TABLE_DEPTH 16 +#define UV3H_NODE_PRESENT_TABLE_DEPTH 16 +#define UV4H_NODE_PRESENT_TABLE_DEPTH 4 +#define UVH_NODE_PRESENT_TABLE_DEPTH ( \ + is_uv1_hub() ? UV1H_NODE_PRESENT_TABLE_DEPTH : \ + is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH : \ + is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH : \ + /*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH) #define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 #define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + union uvh_node_present_table_u { unsigned long v; struct uvh_node_present_table_s { @@ -2158,7 +3014,15 @@ union uvh_node_present_table_u { /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR) #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 @@ -2167,6 +3031,7 @@ union uvh_node_present_table_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { @@ -2182,7 +3047,15 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR) #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 @@ -2191,6 +3064,7 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_1_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { @@ -2206,7 +3080,15 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR) #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 @@ -2215,6 +3097,7 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_overlay_config_2_mmr_s { @@ -2230,11 +3113,20 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR) #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { @@ -2247,11 +3139,20 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR) #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { @@ -2264,11 +3165,20 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR) #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { @@ -2281,14 +3191,17 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_CONFIG_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL #define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL #define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL #define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL +#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL +#define UVH_RH_GAM_CONFIG_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_CONFIG_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR) -#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL #define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL #define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 @@ -2298,9 +3211,7 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { #define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL #define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL -#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 #define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL #define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 @@ -2313,10 +3224,14 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { #define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL #define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL +#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL + + union uvh_rh_gam_config_mmr_u { unsigned long v; struct uvh_rh_gam_config_mmr_s { - unsigned long m_skt:6; /* RW */ + unsigned long rsvd_0_5:6; unsigned long n_skt:4; /* RW */ unsigned long rsvd_10_63:54; } s; @@ -2328,7 +3243,7 @@ union uvh_rh_gam_config_mmr_u { unsigned long rsvd_13_63:51; } s1; struct uvxh_rh_gam_config_mmr_s { - unsigned long m_skt:6; /* RW */ + unsigned long rsvd_0_5:6; unsigned long n_skt:4; /* RW */ unsigned long rsvd_10_63:54; } sx; @@ -2342,20 +3257,28 @@ union uvh_rh_gam_config_mmr_u { unsigned long n_skt:4; /* RW */ unsigned long rsvd_10_63:54; } s3; + struct uv4h_rh_gam_config_mmr_s { + unsigned long rsvd_0_5:6; + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s4; }; /* ========================================================================= */ /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL #define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR) -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL @@ -2368,10 +3291,8 @@ union uvh_rh_gam_config_mmr_u { #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL -#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL @@ -2391,12 +3312,28 @@ union uvh_rh_gam_config_mmr_u { #define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL #define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK ( \ + is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \ + is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \ + is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK) +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT ( \ + is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \ + is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \ + is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \ + /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT) + union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_gru_overlay_config_mmr_s { - unsigned long rsvd_0_27:28; - unsigned long base:18; /* RW */ - unsigned long rsvd_46_51:6; + unsigned long rsvd_0_51:52; unsigned long n_gru:4; /* RW */ unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ @@ -2412,8 +3349,7 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long enable:1; /* RW */ } s1; struct uvxh_rh_gam_gru_overlay_config_mmr_s { - unsigned long rsvd_0_27:28; - unsigned long base:18; /* RW */ + unsigned long rsvd_0_45:46; unsigned long rsvd_46_51:6; unsigned long n_gru:4; /* RW */ unsigned long rsvd_56_62:7; @@ -2436,6 +3372,15 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long mode:1; /* RW */ unsigned long enable:1; /* RW */ } s3; + struct uv4h_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_24:25; + unsigned long undef_25:1; /* Undefined */ + unsigned long base:20; /* RW */ + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ @@ -2443,6 +3388,14 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { /* ========================================================================= */ #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR") +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR") +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR) + #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 @@ -2453,6 +3406,7 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 @@ -2462,6 +3416,7 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_mmioh_overlay_config_mmr_u { unsigned long v; struct uv1h_rh_gam_mmioh_overlay_config_mmr_s { @@ -2485,10 +3440,15 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL #define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL +#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR) #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 @@ -2517,6 +3477,12 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { #define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL #define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + + union uvh_rh_gam_mmr_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_mmr_overlay_config_mmr_s { @@ -2550,16 +3516,31 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u { unsigned long rsvd_46_62:17; unsigned long enable:1; /* RW */ } s3; + struct uv4h_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ /* UVH_RTC */ /* ========================================================================= */ -#define UVH_RTC 0x340000UL +#define UV1H_RTC 0x340000UL +#define UV2H_RTC 0x340000UL +#define UV3H_RTC 0x340000UL +#define UV4H_RTC 0xe0000UL +#define UVH_RTC ( \ + is_uv1_hub() ? UV1H_RTC : \ + is_uv2_hub() ? UV2H_RTC : \ + is_uv3_hub() ? UV3H_RTC : \ + /*is_uv4_hub*/ UV4H_RTC) #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + union uvh_rtc_u { unsigned long v; struct uvh_rtc_s { @@ -2590,6 +3571,7 @@ union uvh_rtc_u { #define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_rtc1_int_config_u { unsigned long v; struct uvh_rtc1_int_config_s { @@ -2609,12 +3591,30 @@ union uvh_rtc1_int_config_u { /* ========================================================================= */ /* UVH_SCRATCH5 */ /* ========================================================================= */ -#define UVH_SCRATCH5 0x2d0200UL -#define UVH_SCRATCH5_32 0x778 +#define UV1H_SCRATCH5 0x2d0200UL +#define UV2H_SCRATCH5 0x2d0200UL +#define UV3H_SCRATCH5 0x2d0200UL +#define UV4H_SCRATCH5 0xb0200UL +#define UVH_SCRATCH5 ( \ + is_uv1_hub() ? UV1H_SCRATCH5 : \ + is_uv2_hub() ? UV2H_SCRATCH5 : \ + is_uv3_hub() ? UV3H_SCRATCH5 : \ + /*is_uv4_hub*/ UV4H_SCRATCH5) + +#define UV1H_SCRATCH5_32 0x778 +#define UV2H_SCRATCH5_32 0x778 +#define UV3H_SCRATCH5_32 0x778 +#define UV4H_SCRATCH5_32 0x798 +#define UVH_SCRATCH5_32 ( \ + is_uv1_hub() ? UV1H_SCRATCH5_32 : \ + is_uv2_hub() ? UV2H_SCRATCH5_32 : \ + is_uv3_hub() ? UV3H_SCRATCH5_32 : \ + /*is_uv4_hub*/ UV4H_SCRATCH5_32) #define UVH_SCRATCH5_SCRATCH5_SHFT 0 #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + union uvh_scratch5_u { unsigned long v; struct uvh_scratch5_s { @@ -2625,14 +3625,39 @@ union uvh_scratch5_u { /* ========================================================================= */ /* UVH_SCRATCH5_ALIAS */ /* ========================================================================= */ -#define UVH_SCRATCH5_ALIAS 0x2d0208UL -#define UVH_SCRATCH5_ALIAS_32 0x780 +#define UV1H_SCRATCH5_ALIAS 0x2d0208UL +#define UV2H_SCRATCH5_ALIAS 0x2d0208UL +#define UV3H_SCRATCH5_ALIAS 0x2d0208UL +#define UV4H_SCRATCH5_ALIAS 0xb0208UL +#define UVH_SCRATCH5_ALIAS ( \ + is_uv1_hub() ? UV1H_SCRATCH5_ALIAS : \ + is_uv2_hub() ? UV2H_SCRATCH5_ALIAS : \ + is_uv3_hub() ? UV3H_SCRATCH5_ALIAS : \ + /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS) + +#define UV1H_SCRATCH5_ALIAS_32 0x780 +#define UV2H_SCRATCH5_ALIAS_32 0x780 +#define UV3H_SCRATCH5_ALIAS_32 0x780 +#define UV4H_SCRATCH5_ALIAS_32 0x7a0 +#define UVH_SCRATCH5_ALIAS_32 ( \ + is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_32 : \ + is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 : \ + is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 : \ + /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32) /* ========================================================================= */ /* UVH_SCRATCH5_ALIAS_2 */ /* ========================================================================= */ -#define UVH_SCRATCH5_ALIAS_2 0x2d0210UL +#define UV1H_SCRATCH5_ALIAS_2 0x2d0210UL +#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL +#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL +#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL +#define UVH_SCRATCH5_ALIAS_2 ( \ + is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_2 : \ + is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 : \ + is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 : \ + /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2) #define UVH_SCRATCH5_ALIAS_2_32 0x788 @@ -2640,76 +3665,255 @@ union uvh_scratch5_u { /* UVXH_EVENT_OCCURRED2 */ /* ========================================================================= */ #define UVXH_EVENT_OCCURRED2 0x70100UL -#define UVXH_EVENT_OCCURRED2_32 0xb68 - -#define UVXH_EVENT_OCCURRED2_RTC_0_SHFT 0 -#define UVXH_EVENT_OCCURRED2_RTC_1_SHFT 1 -#define UVXH_EVENT_OCCURRED2_RTC_2_SHFT 2 -#define UVXH_EVENT_OCCURRED2_RTC_3_SHFT 3 -#define UVXH_EVENT_OCCURRED2_RTC_4_SHFT 4 -#define UVXH_EVENT_OCCURRED2_RTC_5_SHFT 5 -#define UVXH_EVENT_OCCURRED2_RTC_6_SHFT 6 -#define UVXH_EVENT_OCCURRED2_RTC_7_SHFT 7 -#define UVXH_EVENT_OCCURRED2_RTC_8_SHFT 8 -#define UVXH_EVENT_OCCURRED2_RTC_9_SHFT 9 -#define UVXH_EVENT_OCCURRED2_RTC_10_SHFT 10 -#define UVXH_EVENT_OCCURRED2_RTC_11_SHFT 11 -#define UVXH_EVENT_OCCURRED2_RTC_12_SHFT 12 -#define UVXH_EVENT_OCCURRED2_RTC_13_SHFT 13 -#define UVXH_EVENT_OCCURRED2_RTC_14_SHFT 14 -#define UVXH_EVENT_OCCURRED2_RTC_15_SHFT 15 -#define UVXH_EVENT_OCCURRED2_RTC_16_SHFT 16 -#define UVXH_EVENT_OCCURRED2_RTC_17_SHFT 17 -#define UVXH_EVENT_OCCURRED2_RTC_18_SHFT 18 -#define UVXH_EVENT_OCCURRED2_RTC_19_SHFT 19 -#define UVXH_EVENT_OCCURRED2_RTC_20_SHFT 20 -#define UVXH_EVENT_OCCURRED2_RTC_21_SHFT 21 -#define UVXH_EVENT_OCCURRED2_RTC_22_SHFT 22 -#define UVXH_EVENT_OCCURRED2_RTC_23_SHFT 23 -#define UVXH_EVENT_OCCURRED2_RTC_24_SHFT 24 -#define UVXH_EVENT_OCCURRED2_RTC_25_SHFT 25 -#define UVXH_EVENT_OCCURRED2_RTC_26_SHFT 26 -#define UVXH_EVENT_OCCURRED2_RTC_27_SHFT 27 -#define UVXH_EVENT_OCCURRED2_RTC_28_SHFT 28 -#define UVXH_EVENT_OCCURRED2_RTC_29_SHFT 29 -#define UVXH_EVENT_OCCURRED2_RTC_30_SHFT 30 -#define UVXH_EVENT_OCCURRED2_RTC_31_SHFT 31 -#define UVXH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL -#define UVXH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL -#define UVXH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL -#define UVXH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL -#define UVXH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL -#define UVXH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL -#define UVXH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL -#define UVXH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL -#define UVXH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL -#define UVXH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL -#define UVXH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL -#define UVXH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL -#define UVXH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL -#define UVXH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL -#define UVXH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL -#define UVXH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL -#define UVXH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL -#define UVXH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL -#define UVXH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL -#define UVXH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL -#define UVXH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL -#define UVXH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL -#define UVXH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL -#define UVXH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL -#define UVXH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL -#define UVXH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL -#define UVXH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL -#define UVXH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL -#define UVXH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL -#define UVXH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL -#define UVXH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL -#define UVXH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL - -union uvxh_event_occurred2_u { + +#define UV2H_EVENT_OCCURRED2_32 0xb68 +#define UV3H_EVENT_OCCURRED2_32 0xb68 +#define UV4H_EVENT_OCCURRED2_32 0x608 +#define UVH_EVENT_OCCURRED2_32 ( \ + is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 : \ + is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 : \ + /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32) + + +#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15 +#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16 +#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17 +#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18 +#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19 +#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20 +#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21 +#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22 +#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23 +#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24 +#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25 +#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26 +#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27 +#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28 +#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29 +#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30 +#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31 +#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32 +#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33 +#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34 +#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35 +#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36 +#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37 +#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38 +#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39 +#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40 +#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41 +#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42 +#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43 +#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44 +#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45 +#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46 +#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47 +#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48 +#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL +#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL +#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL + +#define UVXH_EVENT_OCCURRED2_RTC_1_MASK ( \ + is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK : \ + is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK : \ + /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK) + +union uvh_event_occurred2_u { unsigned long v; - struct uvxh_event_occurred2_s { + struct uv2h_event_occurred2_s { unsigned long rtc_0:1; /* RW */ unsigned long rtc_1:1; /* RW */ unsigned long rtc_2:1; /* RW */ @@ -2743,25 +3947,129 @@ union uvxh_event_occurred2_u { unsigned long rtc_30:1; /* RW */ unsigned long rtc_31:1; /* RW */ unsigned long rsvd_32_63:32; - } sx; + } s2; + struct uv3h_event_occurred2_s { + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_32_63:32; + } s3; + struct uv4h_event_occurred2_s { + unsigned long message_accelerator_int0:1; /* RW */ + unsigned long message_accelerator_int1:1; /* RW */ + unsigned long message_accelerator_int2:1; /* RW */ + unsigned long message_accelerator_int3:1; /* RW */ + unsigned long message_accelerator_int4:1; /* RW */ + unsigned long message_accelerator_int5:1; /* RW */ + unsigned long message_accelerator_int6:1; /* RW */ + unsigned long message_accelerator_int7:1; /* RW */ + unsigned long message_accelerator_int8:1; /* RW */ + unsigned long message_accelerator_int9:1; /* RW */ + unsigned long message_accelerator_int10:1; /* RW */ + unsigned long message_accelerator_int11:1; /* RW */ + unsigned long message_accelerator_int12:1; /* RW */ + unsigned long message_accelerator_int13:1; /* RW */ + unsigned long message_accelerator_int14:1; /* RW */ + unsigned long message_accelerator_int15:1; /* RW */ + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_50_63:14; + } s4; }; /* ========================================================================= */ /* UVXH_EVENT_OCCURRED2_ALIAS */ /* ========================================================================= */ #define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL -#define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70 + +#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 +#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70 +#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610 +#define UVH_EVENT_OCCURRED2_ALIAS_32 ( \ + is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 : \ + is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 : \ + /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32) /* ========================================================================= */ /* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */ /* ========================================================================= */ -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL #define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 ( \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2) + +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 ( \ + is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32) #define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 #define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL @@ -2772,6 +4080,10 @@ union uvxh_event_occurred2_u { #define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 #define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 +#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL + + union uvxh_lb_bau_sb_activation_status_2_u { unsigned long v; struct uvxh_lb_bau_sb_activation_status_2_s { @@ -2783,6 +4095,9 @@ union uvxh_lb_bau_sb_activation_status_2_u { struct uv3h_lb_bau_sb_activation_status_2_s { unsigned long aux_error:64; /* RW */ } s3; + struct uv4h_lb_bau_sb_activation_status_2_s { + unsigned long aux_error:64; /* RW */ + } s4; }; /* ========================================================================= */ @@ -2823,26 +4138,6 @@ union uv3h_gr0_gam_gr_config_u { }; /* ========================================================================= */ -/* UV3H_GR1_GAM_GR_CONFIG */ -/* ========================================================================= */ -#define UV3H_GR1_GAM_GR_CONFIG 0x1000028UL - -#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_SHFT 0 -#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_SHFT 10 -#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL -#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL - -union uv3h_gr1_gam_gr_config_u { - unsigned long v; - struct uv3h_gr1_gam_gr_config_s { - unsigned long m_skt:6; /* RW */ - unsigned long undef_6_9:4; /* Undefined */ - unsigned long subspace:1; /* RW */ - unsigned long reserved:53; - } s3; -}; - -/* ========================================================================= */ /* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */ /* ========================================================================= */ #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL @@ -2924,5 +4219,67 @@ union uv3h_rh_gam_mmioh_redirect_config1_mmr_u { } s3; }; +/* ========================================================================= */ +/* UV4H_LB_PROC_INTD_QUEUE_FIRST */ +/* ========================================================================= */ +#define UV4H_LB_PROC_INTD_QUEUE_FIRST 0xa4100UL + +#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6 +#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL + +union uv4h_lb_proc_intd_queue_first_u { + unsigned long v; + struct uv4h_lb_proc_intd_queue_first_s { + unsigned long undef_0_5:6; /* Undefined */ + unsigned long first_payload_address:40; /* RW */ + } s4; +}; + +/* ========================================================================= */ +/* UV4H_LB_PROC_INTD_QUEUE_LAST */ +/* ========================================================================= */ +#define UV4H_LB_PROC_INTD_QUEUE_LAST 0xa4108UL + +#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5 +#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL + +union uv4h_lb_proc_intd_queue_last_u { + unsigned long v; + struct uv4h_lb_proc_intd_queue_last_s { + unsigned long undef_0_4:5; /* Undefined */ + unsigned long last_payload_address:41; /* RW */ + } s4; +}; + +/* ========================================================================= */ +/* UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR */ +/* ========================================================================= */ +#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR 0xa4118UL + +#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0 +#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL + +union uv4h_lb_proc_intd_soft_ack_clear_u { + unsigned long v; + struct uv4h_lb_proc_intd_soft_ack_clear_s { + unsigned long soft_ack_pending_flags:8; /* WP */ + } s4; +}; + +/* ========================================================================= */ +/* UV4H_LB_PROC_INTD_SOFT_ACK_PENDING */ +/* ========================================================================= */ +#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING 0xa4110UL + +#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0 +#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL + +union uv4h_lb_proc_intd_soft_ack_pending_u { + unsigned long v; + struct uv4h_lb_proc_intd_soft_ack_pending_s { + unsigned long soft_ack_flags:8; /* RW */ + } s4; +}; + #endif /* _ASM_X86_UV_UV_MMRS_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1ae89a2721d6..4dcdf74dfed8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,44 @@ struct x86_cpuinit_ops { struct timespec; /** + * struct x86_legacy_devices - legacy x86 devices + * + * @pnpbios: this platform can have a PNPBIOS. If this is disabled the platform + * is known to never have a PNPBIOS. + * + * These are devices known to require LPC or ISA bus. The definition of legacy + * devices adheres to the ACPI 5.2.9.3 IA-PC Boot Architecture flag + * ACPI_FADT_LEGACY_DEVICES. These devices consist of user visible devices on + * the LPC or ISA bus. User visible devices are devices that have end-user + * accessible connectors (for example, LPT parallel port). Legacy devices on + * the LPC bus consist for example of serial and parallel ports, PS/2 keyboard + * / mouse, and the floppy disk controller. A system that lacks all known + * legacy devices can assume all devices can be detected exclusively via + * standard device enumeration mechanisms including the ACPI namespace. + * + * A system which has does not have ACPI_FADT_LEGACY_DEVICES enabled must not + * have any of the legacy devices enumerated below present. + */ +struct x86_legacy_devices { + int pnpbios; +}; + +/** + * struct x86_legacy_features - legacy x86 features + * + * @rtc: this device has a CMOS real-time clock present + * @ebda_search: it's safe to search for the EBDA signature in the hardware's + * low RAM + * @devices: legacy x86 devices, refer to struct x86_legacy_devices + * documentation for further details. + */ +struct x86_legacy_features { + int rtc; + int ebda_search; + struct x86_legacy_devices devices; +}; + +/** * struct x86_platform_ops - platform specific runtime functions * @calibrate_tsc: calibrate TSC * @get_wallclock: get time from HW clock like RTC etc. @@ -152,6 +190,14 @@ struct timespec; * @save_sched_clock_state: save state for sched_clock() on suspend * @restore_sched_clock_state: restore state for sched_clock() on resume * @apic_post_init: adjust apic if neeeded + * @legacy: legacy features + * @set_legacy_features: override legacy features. Use of this callback + * is highly discouraged. You should only need + * this if your hardware platform requires further + * custom fine tuning far beyong what may be + * possible in x86_early_init_platform_quirks() by + * only using the current x86_hardware_subarch + * semantics. */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); @@ -165,6 +211,8 @@ struct x86_platform_ops { void (*save_sched_clock_state)(void); void (*restore_sched_clock_state)(void); void (*apic_post_init)(void); + struct x86_legacy_features legacy; + void (*set_legacy_features)(void); }; struct pci_dev; @@ -186,6 +234,8 @@ extern struct x86_cpuinit_ops x86_cpuinit; extern struct x86_platform_ops x86_platform; extern struct x86_msi_ops x86_msi; extern struct x86_io_apic_ops x86_io_apic_ops; + +extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index c54beb44c4c1..635eac543922 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -550,7 +550,7 @@ static struct xor_block_template xor_block_pIII_sse = { #define XOR_TRY_TEMPLATES \ do { \ AVX_XOR_SPEED; \ - if (cpu_has_xmm) { \ + if (boot_cpu_has(X86_FEATURE_XMM)) { \ xor_speed(&xor_block_pIII_sse); \ xor_speed(&xor_block_sse_pf64); \ } else if (boot_cpu_has(X86_FEATURE_MMX)) { \ diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 7c0a517ec751..22a7b1870a31 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = { #define AVX_XOR_SPEED \ do { \ - if (cpu_has_avx && cpu_has_osxsave) \ + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \ xor_speed(&xor_block_avx); \ } while (0) #define AVX_SELECT(FASTEST) \ - (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST) + (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) #else diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 329254373479..c18ce67495fa 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -157,7 +157,46 @@ struct boot_params { __u8 _pad9[276]; /* 0xeec */ } __attribute__((packed)); -enum { +/** + * enum x86_hardware_subarch - x86 hardware subarchitecture + * + * The x86 hardware_subarch and hardware_subarch_data were added as of the x86 + * boot protocol 2.07 to help distinguish and support custom x86 boot + * sequences. This enum represents accepted values for the x86 + * hardware_subarch. Custom x86 boot sequences (not X86_SUBARCH_PC) do not + * have or simply *cannot* make use of natural stubs like BIOS or EFI, the + * hardware_subarch can be used on the Linux entry path to revector to a + * subarchitecture stub when needed. This subarchitecture stub can be used to + * set up Linux boot parameters or for special care to account for nonstandard + * handling of page tables. + * + * These enums should only ever be used by x86 code, and the code that uses + * it should be well contained and compartamentalized. + * + * KVM and Xen HVM do not have a subarch as these are expected to follow + * standard x86 boot entries. If there is a genuine need for "hypervisor" type + * that should be considered separately in the future. Future guest types + * should seriously consider working with standard x86 boot stubs such as + * the BIOS or EFI boot stubs. + * + * WARNING: this enum is only used for legacy hacks, for platform features that + * are not easily enumerated or discoverable. You should not ever use + * this for new features. + * + * @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard + * PC mechanisms (PCI, ACPI) and doesn't need a special boot flow. + * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest + * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path, + * which start at asm startup_xen() entry point and later jump to the C + * xen_start_kernel() entry point. Both domU and dom0 type of guests are + * currently supportd through this PV boot path. + * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform + * systems which do not have the PCI legacy interfaces. + * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for + * for settop boxes and media devices, the use of a subarch for CE4100 + * is more of a hack... + */ +enum x86_hardware_subarch { X86_SUBARCH_PC = 0, X86_SUBARCH_LGUEST, X86_SUBARCH_XEN, diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 616ebd22ef9a..9abf8551c7e4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -2,7 +2,11 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds +extra-y := head_$(BITS).o +extra-y += head$(BITS).o +extra-y += ebda.o +extra-y += platform-quirks.o +extra-y += vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c2f1ef6ca23..f115a58f7c84 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -136,7 +136,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) { struct acpi_table_madt *madt = NULL; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -EINVAL; madt = (struct acpi_table_madt *)table; @@ -913,6 +913,15 @@ late_initcall(hpet_insert_resource); static int __init acpi_parse_fadt(struct acpi_table_header *table) { + if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) { + pr_debug("ACPI: no legacy devices present\n"); + x86_platform.legacy.devices.pnpbios = 0; + } + + if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { + pr_debug("ACPI: not registering RTC platform device\n"); + x86_platform.legacy.rtc = 0; + } #ifdef CONFIG_X86_PM_TIMER /* detect the location of the ACPI PM Timer */ @@ -951,7 +960,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) { int count; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; /* @@ -979,7 +988,7 @@ static int __init acpi_parse_madt_lapic_entries(void) int ret; struct acpi_subtable_proc madt_proc[2]; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; /* @@ -1125,7 +1134,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) if (acpi_disabled || acpi_noirq) return -ENODEV; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; /* diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 25f909362b7a..5cb272a7a5a3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -11,6 +11,7 @@ #include <linux/stop_machine.h> #include <linux/slab.h> #include <linux/kdebug.h> +#include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d356987a04e9..60078a67d7e3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -607,7 +607,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) long tapic = apic_read(APIC_TMCCT); unsigned long pm = acpi_pm_read_early(); - if (cpu_has_tsc) + if (boot_cpu_has(X86_FEATURE_TSC)) tsc = rdtsc(); switch (lapic_cal_loops++) { @@ -668,7 +668,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) *delta = (long)res; /* Correct the tsc counter value */ - if (cpu_has_tsc) { + if (boot_cpu_has(X86_FEATURE_TSC)) { res = (((u64)(*deltatsc)) * pm_100ms); do_div(res, deltapm); apic_printk(APIC_VERBOSE, "TSC delta adjusted to " @@ -760,7 +760,7 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", lapic_timer_frequency); - if (cpu_has_tsc) { + if (boot_cpu_has(X86_FEATURE_TSC)) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), @@ -1085,7 +1085,7 @@ void lapic_shutdown(void) { unsigned long flags; - if (!cpu_has_apic && !apic_from_smp_config()) + if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) return; local_irq_save(flags); @@ -1134,7 +1134,7 @@ void __init init_bsp_APIC(void) * Don't do the setup now if we have a SMP BIOS as the * through-I/O-APIC virtual wire mode might be active. */ - if (smp_found_config || !cpu_has_apic) + if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) return; /* @@ -1227,7 +1227,7 @@ void setup_local_APIC(void) unsigned long long tsc = 0, ntsc; long long max_loops = cpu_khz ? cpu_khz : 1000000; - if (cpu_has_tsc) + if (boot_cpu_has(X86_FEATURE_TSC)) tsc = rdtsc(); if (disable_apic) { @@ -1311,7 +1311,7 @@ void setup_local_APIC(void) break; } if (queued) { - if (cpu_has_tsc && cpu_khz) { + if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { ntsc = rdtsc(); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else @@ -1445,7 +1445,7 @@ static void __x2apic_disable(void) { u64 msr; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return; rdmsrl(MSR_IA32_APICBASE, msr); @@ -1561,7 +1561,7 @@ void __init check_x2apic(void) pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); x2apic_mode = 1; x2apic_state = X2APIC_ON; - } else if (!cpu_has_x2apic) { + } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { x2apic_state = X2APIC_DISABLED; } } @@ -1632,7 +1632,7 @@ void __init enable_IR_x2apic(void) */ static int __init detect_init_APIC(void) { - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { pr_info("No local APIC present\n"); return -1; } @@ -1711,14 +1711,14 @@ static int __init detect_init_APIC(void) goto no_apic; case X86_VENDOR_INTEL: if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) + (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) break; goto no_apic; default: goto no_apic; } - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { /* * Over-ride BIOS and try to enable the local APIC only if * "lapic" specified. @@ -2233,19 +2233,19 @@ int __init APIC_init_uniprocessor(void) return -1; } #ifdef CONFIG_X86_64 - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { disable_apic = 1; pr_info("Apic disabled by BIOS\n"); return -1; } #else - if (!smp_found_config && !cpu_has_apic) + if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC)) return -1; /* * Complain if the BIOS pretends there is one. */ - if (!cpu_has_apic && + if (!boot_cpu_has(X86_FEATURE_APIC) && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { pr_err("BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); @@ -2426,7 +2426,7 @@ static void apic_pm_activate(void) static int __init init_lapic_sysfs(void) { /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - if (cpu_has_apic) + if (boot_cpu_has(X86_FEATURE_APIC)) register_syscore_ops(&lapic_syscore_ops); return 0; diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 331a7a07c48f..13d19ed58514 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,13 +100,13 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, static u32 noop_apic_read(u32 reg) { - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); return 0; } static void noop_apic_write(u32 reg, u32 v) { - WARN_ON_ONCE(cpu_has_apic && !disable_apic); + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); } struct apic apic_noop = { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index fdb0fbfb1197..84e33ff5a6d5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1454,7 +1454,7 @@ void native_disable_io_apic(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } - if (cpu_has_apic || apic_from_smp_config()) + if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config()) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 28bde88b0085..2a0f225afebd 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -230,7 +230,7 @@ int safe_smp_processor_id(void) { int apicid, cpuid; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return 0; apicid = hard_smp_processor_id(); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ef495511f019..a5e400afc563 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -944,7 +944,7 @@ static int __init print_ICs(void) print_PIC(); /* don't print out if apic is not there */ - if (!cpu_has_apic && !apic_from_smp_config()) + if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) return 0; print_local_APICs(show_lapic); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d7ce96a7daca..29003154fafd 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -48,12 +48,35 @@ static u64 gru_start_paddr, gru_end_paddr; static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; static u64 gru_dist_lmask, gru_dist_umask; static union uvh_apicid uvh_apicid; + +/* info derived from CPUID */ +static struct { + unsigned int apicid_shift; + unsigned int apicid_mask; + unsigned int socketid_shift; /* aka pnode_shift for UV1/2/3 */ + unsigned int pnode_mask; + unsigned int gpa_shift; +} uv_cpuid; + int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); unsigned int uv_apicid_hibits; EXPORT_SYMBOL_GPL(uv_apicid_hibits); static struct apic apic_x2apic_uv_x; +static struct uv_hub_info_s uv_hub_info_node0; + +/* Set this to use hardware error handler instead of kernel panic */ +static int disable_uv_undefined_panic = 1; +unsigned long uv_undefined(char *str) +{ + if (likely(!disable_uv_undefined_panic)) + panic("UV: error: undefined MMR: %s\n", str); + else + pr_crit("UV: error: undefined MMR: %s\n", str); + return ~0ul; /* cause a machine fault */ +} +EXPORT_SYMBOL(uv_undefined); static unsigned long __init uv_early_read_mmr(unsigned long addr) { @@ -108,21 +131,71 @@ static int __init early_get_pnodeid(void) case UV3_HUB_PART_NUMBER_X: uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; break; + case UV4_HUB_PART_NUMBER: + uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1; + break; } uv_hub_info->hub_revision = uv_min_hub_revision_id; - pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); + uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1; + pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask; + uv_cpuid.gpa_shift = 46; /* default unless changed */ + + pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n", + node_id.s.revision, node_id.s.part_number, node_id.s.node_id, + m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode); return pnode; } -static void __init early_get_apic_pnode_shift(void) +/* [copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ +#define SMT_LEVEL 0 /* leaf 0xb SMT level */ +#define INVALID_TYPE 0 /* leaf 0xb sub-leaf types */ +#define SMT_TYPE 1 +#define CORE_TYPE 2 +#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) +#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) + +static void set_x2apic_bits(void) +{ + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int sid_shift; + + cpuid(0, &eax, &ebx, &ecx, &edx); + if (eax < 0xb) { + pr_info("UV: CPU does not have CPUID.11\n"); + return; + } + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) { + pr_info("UV: CPUID.11 not implemented\n"); + return; + } + sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); + sub_index = 1; + do { + cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { + sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); + break; + } + sub_index++; + } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + uv_cpuid.apicid_shift = 0; + uv_cpuid.apicid_mask = (~(-1 << sid_shift)); + uv_cpuid.socketid_shift = sid_shift; +} + +static void __init early_get_apic_socketid_shift(void) { - uvh_apicid.v = uv_early_read_mmr(UVH_APICID); - if (!uvh_apicid.v) - /* - * Old bios, use default value - */ - uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; + if (is_uv2_hub() || is_uv3_hub()) + uvh_apicid.v = uv_early_read_mmr(UVH_APICID); + + set_x2apic_bits(); + + pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", + uv_cpuid.apicid_shift, uv_cpuid.apicid_mask); + pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", + uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); } /* @@ -150,13 +223,18 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (strncmp(oem_id, "SGI", 3) != 0) return 0; + /* Setup early hub type field in uv_hub_info for Node 0 */ + uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; + /* * Determine UV arch type. * SGI: UV100/1000 * SGI2: UV2000/3000 * SGI3: UV300 (truncated to 4 chars because of different varieties) + * SGI4: UV400 (truncated to 4 chars because of different varieties) */ uv_hub_info->hub_revision = + !strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE : !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE : !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0; @@ -165,7 +243,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) goto badbios; pnodeid = early_get_pnodeid(); - early_get_apic_pnode_shift(); + early_get_apic_socketid_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; x86_platform.nmi_init = uv_nmi_init; @@ -211,17 +289,11 @@ int is_uv_system(void) } EXPORT_SYMBOL_GPL(is_uv_system); -DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); - -struct uv_blade_info *uv_blade_info; -EXPORT_SYMBOL_GPL(uv_blade_info); - -short *uv_node_to_blade; -EXPORT_SYMBOL_GPL(uv_node_to_blade); +void **__uv_hub_info_list; +EXPORT_SYMBOL_GPL(__uv_hub_info_list); -short *uv_cpu_to_blade; -EXPORT_SYMBOL_GPL(uv_cpu_to_blade); +DEFINE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info); +EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_info); short uv_possible_blades; EXPORT_SYMBOL_GPL(uv_possible_blades); @@ -229,6 +301,115 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); +/* the following values are used for the per node hub info struct */ +static __initdata unsigned short *_node_to_pnode; +static __initdata unsigned short _min_socket, _max_socket; +static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; +static __initdata struct uv_gam_range_entry *uv_gre_table; +static __initdata struct uv_gam_parameters *uv_gp_table; +static __initdata unsigned short *_socket_to_node; +static __initdata unsigned short *_socket_to_pnode; +static __initdata unsigned short *_pnode_to_socket; +static __initdata struct uv_gam_range_s *_gr_table; +#define SOCK_EMPTY ((unsigned short)~0) + +extern int uv_hub_info_version(void) +{ + return UV_HUB_INFO_VERSION; +} +EXPORT_SYMBOL(uv_hub_info_version); + +/* Build GAM range lookup table */ +static __init void build_uv_gr_table(void) +{ + struct uv_gam_range_entry *gre = uv_gre_table; + struct uv_gam_range_s *grt; + unsigned long last_limit = 0, ram_limit = 0; + int bytes, i, sid, lsid = -1; + + if (!gre) + return; + + bytes = _gr_table_len * sizeof(struct uv_gam_range_s); + grt = kzalloc(bytes, GFP_KERNEL); + BUG_ON(!grt); + _gr_table = grt; + + for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { + if (gre->type == UV_GAM_RANGE_TYPE_HOLE) { + if (!ram_limit) { /* mark hole between ram/non-ram */ + ram_limit = last_limit; + last_limit = gre->limit; + lsid++; + continue; + } + last_limit = gre->limit; + pr_info("UV: extra hole in GAM RE table @%d\n", + (int)(gre - uv_gre_table)); + continue; + } + if (_max_socket < gre->sockid) { + pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n", + gre->sockid, _max_socket, + (int)(gre - uv_gre_table)); + continue; + } + sid = gre->sockid - _min_socket; + if (lsid < sid) { /* new range */ + grt = &_gr_table[sid]; + grt->base = lsid; + grt->nasid = gre->nasid; + grt->limit = last_limit = gre->limit; + lsid = sid; + continue; + } + if (lsid == sid && !ram_limit) { /* update range */ + if (grt->limit == last_limit) { /* .. if contiguous */ + grt->limit = last_limit = gre->limit; + continue; + } + } + if (!ram_limit) { /* non-contiguous ram range */ + grt++; + grt->base = sid - 1; + grt->nasid = gre->nasid; + grt->limit = last_limit = gre->limit; + continue; + } + grt++; /* non-contiguous/non-ram */ + grt->base = grt - _gr_table; /* base is this entry */ + grt->nasid = gre->nasid; + grt->limit = last_limit = gre->limit; + lsid++; + } + + /* shorten table if possible */ + grt++; + i = grt - _gr_table; + if (i < _gr_table_len) { + void *ret; + + bytes = i * sizeof(struct uv_gam_range_s); + ret = krealloc(_gr_table, bytes, GFP_KERNEL); + if (ret) { + _gr_table = ret; + _gr_table_len = i; + } + } + + /* display resultant gam range table */ + for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) { + int gb = grt->base; + unsigned long start = gb < 0 ? 0 : + (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT; + unsigned long end = + (unsigned long)grt->limit << UV_GAM_RANGE_SHFT; + + pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n", + i, grt->nasid, start, end, gb); + } +} + static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { unsigned long val; @@ -355,7 +536,6 @@ static unsigned long set_apic_id(unsigned int id) static unsigned int uv_read_apic_id(void) { - return x2apic_get_apic_id(apic_read(APIC_ID)); } @@ -430,58 +610,38 @@ static void set_x2apic_extra_bits(int pnode) __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); } -/* - * Called on boot cpu. - */ -static __init int boot_pnode_to_blade(int pnode) -{ - int blade; - - for (blade = 0; blade < uv_num_possible_blades(); blade++) - if (pnode == uv_blade_info[blade].pnode) - return blade; - BUG(); -} - -struct redir_addr { - unsigned long redirect; - unsigned long alias; -}; - +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH 3 #define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT -static __initdata struct redir_addr redir_addrs[] = { - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR}, -}; - -static unsigned char get_n_lshift(int m_val) -{ - union uv3h_gr0_gam_gr_config_u m_gr_config; - - if (is_uv1_hub()) - return m_val; - - if (is_uv2_hub()) - return m_val == 40 ? 40 : 39; - - m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG); - return m_gr_config.s3.m_skt; -} - static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) { union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias; union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; + unsigned long m_redirect; + unsigned long m_overlay; int i; - for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { - alias.v = uv_read_local_mmr(redir_addrs[i].alias); + for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) { + switch (i) { + case 0: + m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR; + m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR; + break; + case 1: + m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR; + m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR; + break; + case 2: + m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR; + m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR; + break; + } + alias.v = uv_read_local_mmr(m_overlay); if (alias.s.enable && alias.s.base == 0) { *size = (1UL << alias.s.m_alias); - redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); - *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; + redirect.v = uv_read_local_mmr(m_redirect); + *base = (unsigned long)redirect.s.dest_base + << DEST_SHIFT; return; } } @@ -544,6 +704,8 @@ static __init void map_gru_high(int max_pnode) { union uvh_rh_gam_gru_overlay_config_mmr_u gru; int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; + unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK; + unsigned long base; gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); if (!gru.s.enable) { @@ -555,8 +717,9 @@ static __init void map_gru_high(int max_pnode) map_gru_distributed(gru.v); return; } - map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); - gru_start_paddr = ((u64)gru.s.base << shift); + base = (gru.v & mask) >> shift; + map_high("GRU", base, shift, shift, max_pnode, map_wb); + gru_start_paddr = ((u64)base << shift); gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); } @@ -595,6 +758,7 @@ static __initdata struct mmioh_config mmiohs[] = { }, }; +/* UV3 & UV4 have identical MMIOH overlay configs */ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) { union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; @@ -674,7 +838,7 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode) unsigned long mmr, base; int shift, enable, m_io, n_io; - if (is_uv3_hub()) { + if (is_uv3_hub() || is_uv4_hub()) { /* Map both MMIOH Regions */ map_mmioh_high_uv3(0, min_pnode, max_pnode); map_mmioh_high_uv3(1, min_pnode, max_pnode); @@ -739,8 +903,8 @@ static __init void uv_rtc_init(void) */ static void uv_heartbeat(unsigned long ignored) { - struct timer_list *timer = &uv_hub_info->scir.timer; - unsigned char bits = uv_hub_info->scir.state; + struct timer_list *timer = &uv_scir_info->timer; + unsigned char bits = uv_scir_info->state; /* flip heartbeat bit */ bits ^= SCIR_CPU_HEARTBEAT; @@ -760,14 +924,14 @@ static void uv_heartbeat(unsigned long ignored) static void uv_heartbeat_enable(int cpu) { - while (!uv_cpu_hub_info(cpu)->scir.enabled) { - struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; + while (!uv_cpu_scir_info(cpu)->enabled) { + struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); setup_timer(timer, uv_heartbeat, cpu); timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; add_timer_on(timer, cpu); - uv_cpu_hub_info(cpu)->scir.enabled = 1; + uv_cpu_scir_info(cpu)->enabled = 1; /* also ensure that boot cpu is enabled */ cpu = 0; @@ -777,9 +941,9 @@ static void uv_heartbeat_enable(int cpu) #ifdef CONFIG_HOTPLUG_CPU static void uv_heartbeat_disable(int cpu) { - if (uv_cpu_hub_info(cpu)->scir.enabled) { - uv_cpu_hub_info(cpu)->scir.enabled = 0; - del_timer(&uv_cpu_hub_info(cpu)->scir.timer); + if (uv_cpu_scir_info(cpu)->enabled) { + uv_cpu_scir_info(cpu)->enabled = 0; + del_timer(&uv_cpu_scir_info(cpu)->timer); } uv_set_cpu_scir_bits(cpu, 0xff); } @@ -862,155 +1026,475 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode, void uv_cpu_init(void) { /* CPU 0 initialization will be done via uv_system_init. */ - if (!uv_blade_info) + if (smp_processor_id() == 0) return; - uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; + uv_hub_info->nr_online_cpus++; if (get_uv_system_type() == UV_NON_UNIQUE_APIC) set_x2apic_extra_bits(uv_hub_info->pnode); } -void __init uv_system_init(void) +struct mn { + unsigned char m_val; + unsigned char n_val; + unsigned char m_shift; + unsigned char n_lshift; +}; + +static void get_mn(struct mn *mnp) { - union uvh_rh_gam_config_mmr_u m_n_config; - union uvh_node_id_u node_id; - unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; - int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; - int gnode_extra, min_pnode = 999999, max_pnode = -1; - unsigned long mmr_base, present, paddr; - unsigned short pnode_mask; - unsigned char n_lshift; - char *hub = (is_uv1_hub() ? "UV100/1000" : - (is_uv2_hub() ? "UV2000/3000" : - (is_uv3_hub() ? "UV300" : NULL))); + union uvh_rh_gam_config_mmr_u m_n_config; + union uv3h_gr0_gam_gr_config_u m_gr_config; - if (!hub) { - pr_err("UV: Unknown/unsupported UV hub\n"); - return; + m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR); + mnp->n_val = m_n_config.s.n_skt; + if (is_uv4_hub()) { + mnp->m_val = 0; + mnp->n_lshift = 0; + } else if (is_uv3_hub()) { + mnp->m_val = m_n_config.s3.m_skt; + m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG); + mnp->n_lshift = m_gr_config.s3.m_skt; + } else if (is_uv2_hub()) { + mnp->m_val = m_n_config.s2.m_skt; + mnp->n_lshift = mnp->m_val == 40 ? 40 : 39; + } else if (is_uv1_hub()) { + mnp->m_val = m_n_config.s1.m_skt; + mnp->n_lshift = mnp->m_val; } - pr_info("UV: Found %s hub\n", hub); + mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0; +} - map_low_mmrs(); +void __init uv_init_hub_info(struct uv_hub_info_s *hub_info) +{ + struct mn mn = {0}; /* avoid unitialized warnings */ + union uvh_node_id_u node_id; - m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); - m_val = m_n_config.s.m_skt; - n_val = m_n_config.s.n_skt; - pnode_mask = (1 << n_val) - 1; - n_lshift = get_n_lshift(m_val); - mmr_base = - uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & - ~UV_MMR_ENABLE; + get_mn(&mn); + hub_info->m_val = mn.m_val; + hub_info->n_val = mn.n_val; + hub_info->m_shift = mn.m_shift; + hub_info->n_lshift = mn.n_lshift ? mn.n_lshift : 0; + + hub_info->hub_revision = uv_hub_info->hub_revision; + hub_info->pnode_mask = uv_cpuid.pnode_mask; + hub_info->min_pnode = _min_pnode; + hub_info->min_socket = _min_socket; + hub_info->pnode_to_socket = _pnode_to_socket; + hub_info->socket_to_node = _socket_to_node; + hub_info->socket_to_pnode = _socket_to_pnode; + hub_info->gr_table_len = _gr_table_len; + hub_info->gr_table = _gr_table; + hub_info->gpa_mask = mn.m_val ? + (1UL << (mn.m_val + mn.n_val)) - 1 : + (1UL << uv_cpuid.gpa_shift) - 1; node_id.v = uv_read_local_mmr(UVH_NODE_ID); - gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; - gnode_upper = ((unsigned long)gnode_extra << m_val); - pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x n_lshift 0x%x\n", - n_val, m_val, pnode_mask, gnode_upper, gnode_extra, - n_lshift); + hub_info->gnode_extra = + (node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1; + + hub_info->gnode_upper = + ((unsigned long)hub_info->gnode_extra << mn.m_val); + + if (uv_gp_table) { + hub_info->global_mmr_base = uv_gp_table->mmr_base; + hub_info->global_mmr_shift = uv_gp_table->mmr_shift; + hub_info->global_gru_base = uv_gp_table->gru_base; + hub_info->global_gru_shift = uv_gp_table->gru_shift; + hub_info->gpa_shift = uv_gp_table->gpa_shift; + hub_info->gpa_mask = (1UL << hub_info->gpa_shift) - 1; + } else { + hub_info->global_mmr_base = + uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & + ~UV_MMR_ENABLE; + hub_info->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT; + } - pr_info("UV: global MMR base 0x%lx\n", mmr_base); + get_lowmem_redirect( + &hub_info->lowmem_remap_base, &hub_info->lowmem_remap_top); - for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) - uv_possible_blades += - hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); + hub_info->apic_pnode_shift = uv_cpuid.socketid_shift; - /* uv_num_possible_blades() is really the hub count */ - pr_info("UV: Found %d blades, %d hubs\n", - is_uv1_hub() ? uv_num_possible_blades() : - (uv_num_possible_blades() + 1) / 2, - uv_num_possible_blades()); + /* show system specific info */ + pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", + hub_info->n_val, hub_info->m_val, + hub_info->m_shift, hub_info->n_lshift); - bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!uv_blade_info); + pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", + hub_info->gpa_mask, hub_info->gpa_shift, + hub_info->pnode_mask, hub_info->apic_pnode_shift); - for (blade = 0; blade < uv_num_possible_blades(); blade++) - uv_blade_info[blade].memory_nid = -1; + pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", + hub_info->global_mmr_base, hub_info->global_mmr_shift, + hub_info->global_gru_base, hub_info->global_gru_shift); - get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); + pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", + hub_info->gnode_upper, hub_info->gnode_extra); +} + +static void __init decode_gam_params(unsigned long ptr) +{ + uv_gp_table = (struct uv_gam_parameters *)ptr; + + pr_info("UV: GAM Params...\n"); + pr_info("UV: mmr_base/shift:0x%llx/%d gru_base/shift:0x%llx/%d gpa_shift:%d\n", + uv_gp_table->mmr_base, uv_gp_table->mmr_shift, + uv_gp_table->gru_base, uv_gp_table->gru_shift, + uv_gp_table->gpa_shift); +} + +static void __init decode_gam_rng_tbl(unsigned long ptr) +{ + struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; + unsigned long lgre = 0; + int index = 0; + int sock_min = 999999, pnode_min = 99999; + int sock_max = -1, pnode_max = -1; + + uv_gre_table = gre; + for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { + if (!index) { + pr_info("UV: GAM Range Table...\n"); + pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n", + "Range", "", "Size", "Type", "NASID", + "SID", "PN", "PXM"); + } + pr_info( + "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n", + index++, + (unsigned long)lgre << UV_GAM_RANGE_SHFT, + (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, + ((unsigned long)(gre->limit - lgre)) >> + (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ + gre->type, gre->nasid, gre->sockid, + gre->pnode, gre->pxm); + + lgre = gre->limit; + if (sock_min > gre->sockid) + sock_min = gre->sockid; + if (sock_max < gre->sockid) + sock_max = gre->sockid; + if (pnode_min > gre->pnode) + pnode_min = gre->pnode; + if (pnode_max < gre->pnode) + pnode_max = gre->pnode; + } + _min_socket = sock_min; + _max_socket = sock_max; + _min_pnode = pnode_min; + _max_pnode = pnode_max; + _gr_table_len = index; + pr_info( + "UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", + index, _min_socket, _max_socket, _min_pnode, _max_pnode); +} + +static void __init decode_uv_systab(void) +{ + struct uv_systab *st; + int i; + + st = uv_systab; + if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub()) + return; + if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) { + pr_crit( + "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", + st->revision, UV_SYSTAB_VERSION_UV4_LATEST); + BUG(); + } + + for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) { + unsigned long ptr = st->entry[i].offset; - bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); - uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!uv_node_to_blade); - memset(uv_node_to_blade, 255, bytes); + if (!ptr) + continue; + + ptr = ptr + (unsigned long)st; + + switch (st->entry[i].type) { + case UV_SYSTAB_TYPE_GAM_PARAMS: + decode_gam_params(ptr); + break; + + case UV_SYSTAB_TYPE_GAM_RNG_TBL: + decode_gam_rng_tbl(ptr); + break; + } + } +} - bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); - uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!uv_cpu_to_blade); - memset(uv_cpu_to_blade, 255, bytes); +/* + * Setup physical blade translations from UVH_NODE_PRESENT_TABLE + * .. NB: UVH_NODE_PRESENT_TABLE is going away, + * .. being replaced by GAM Range Table + */ +static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) +{ + int i, uv_pb = 0; - blade = 0; + pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH); for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { - present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); - for (j = 0; j < 64; j++) { - if (!test_bit(j, &present)) - continue; - pnode = (i * 64 + j) & pnode_mask; - uv_blade_info[blade].pnode = pnode; - uv_blade_info[blade].nr_possible_cpus = 0; - uv_blade_info[blade].nr_online_cpus = 0; - spin_lock_init(&uv_blade_info[blade].nmi_lock); - min_pnode = min(pnode, min_pnode); - max_pnode = max(pnode, max_pnode); - blade++; + unsigned long np; + + np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); + if (np) + pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np); + + uv_pb += hweight64(np); + } + if (uv_possible_blades != uv_pb) + uv_possible_blades = uv_pb; +} + +static void __init build_socket_tables(void) +{ + struct uv_gam_range_entry *gre = uv_gre_table; + int num, nump; + int cpu, i, lnid; + int minsock = _min_socket; + int maxsock = _max_socket; + int minpnode = _min_pnode; + int maxpnode = _max_pnode; + size_t bytes; + + if (!gre) { + if (is_uv1_hub() || is_uv2_hub() || is_uv3_hub()) { + pr_info("UV: No UVsystab socket table, ignoring\n"); + return; /* not required */ } + pr_crit( + "UV: Error: UVsystab address translations not available!\n"); + BUG(); + } + + /* build socket id -> node id, pnode */ + num = maxsock - minsock + 1; + bytes = num * sizeof(_socket_to_node[0]); + _socket_to_node = kmalloc(bytes, GFP_KERNEL); + _socket_to_pnode = kmalloc(bytes, GFP_KERNEL); + + nump = maxpnode - minpnode + 1; + bytes = nump * sizeof(_pnode_to_socket[0]); + _pnode_to_socket = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); + + for (i = 0; i < num; i++) + _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; + + for (i = 0; i < nump; i++) + _pnode_to_socket[i] = SOCK_EMPTY; + + /* fill in pnode/node/addr conversion list values */ + pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n"); + for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { + if (gre->type == UV_GAM_RANGE_TYPE_HOLE) + continue; + i = gre->sockid - minsock; + if (_socket_to_pnode[i] != SOCK_EMPTY) + continue; /* duplicate */ + _socket_to_pnode[i] = gre->pnode; + _socket_to_node[i] = gre->pxm; + + i = gre->pnode - minpnode; + _pnode_to_socket[i] = gre->sockid; + + pr_info( + "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n", + gre->sockid, gre->type, gre->nasid, + _socket_to_pnode[gre->sockid - minsock], + _socket_to_node[gre->sockid - minsock], + _pnode_to_socket[gre->pnode - minpnode]); } - uv_bios_init(); + /* check socket -> node values */ + lnid = -1; + for_each_present_cpu(cpu) { + int nid = cpu_to_node(cpu); + int apicid, sockid; + + if (lnid == nid) + continue; + lnid = nid; + apicid = per_cpu(x86_cpu_to_apicid, cpu); + sockid = apicid >> uv_cpuid.socketid_shift; + i = sockid - minsock; + + if (nid != _socket_to_node[i]) { + pr_warn( + "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n", + i, sockid, gre->type, _socket_to_node[i], nid); + _socket_to_node[i] = nid; + } + } + + /* Setup physical blade to pnode translation from GAM Range Table */ + bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); + _node_to_pnode = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!_node_to_pnode); + + for (lnid = 0; lnid < num_possible_nodes(); lnid++) { + unsigned short sockid; + + for (sockid = minsock; sockid <= maxsock; sockid++) { + if (lnid == _socket_to_node[sockid - minsock]) { + _node_to_pnode[lnid] = + _socket_to_pnode[sockid - minsock]; + break; + } + } + if (sockid > maxsock) { + pr_err("UV: socket for node %d not found!\n", lnid); + BUG(); + } + } + + /* + * If socket id == pnode or socket id == node for all nodes, + * system runs faster by removing corresponding conversion table. + */ + pr_info("UV: Checking socket->node/pnode for identity maps\n"); + if (minsock == 0) { + for (i = 0; i < num; i++) + if (_socket_to_node[i] == SOCK_EMPTY || + i != _socket_to_node[i]) + break; + if (i >= num) { + kfree(_socket_to_node); + _socket_to_node = NULL; + pr_info("UV: 1:1 socket_to_node table removed\n"); + } + } + if (minsock == minpnode) { + for (i = 0; i < num; i++) + if (_socket_to_pnode[i] != SOCK_EMPTY && + _socket_to_pnode[i] != i + minpnode) + break; + if (i >= num) { + kfree(_socket_to_pnode); + _socket_to_pnode = NULL; + pr_info("UV: 1:1 socket_to_pnode table removed\n"); + } + } +} + +void __init uv_system_init(void) +{ + struct uv_hub_info_s hub_info = {0}; + int bytes, cpu, nodeid; + unsigned short min_pnode = 9999, max_pnode = 0; + char *hub = is_uv4_hub() ? "UV400" : + is_uv3_hub() ? "UV300" : + is_uv2_hub() ? "UV2000/3000" : + is_uv1_hub() ? "UV100/1000" : NULL; + + if (!hub) { + pr_err("UV: Unknown/unsupported UV hub\n"); + return; + } + pr_info("UV: Found %s hub\n", hub); + + map_low_mmrs(); + + uv_bios_init(); /* get uv_systab for decoding */ + decode_uv_systab(); + build_socket_tables(); + build_uv_gr_table(); + uv_init_hub_info(&hub_info); + uv_possible_blades = num_possible_nodes(); + if (!_node_to_pnode) + boot_init_possible_blades(&hub_info); + + /* uv_num_possible_blades() is really the hub count */ + pr_info("UV: Found %d hubs, %d nodes, %d cpus\n", + uv_num_possible_blades(), + num_possible_nodes(), + num_possible_cpus()); + uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, &sn_region_size, &system_serial_number); + hub_info.coherency_domain_number = sn_coherency_id; uv_rtc_init(); - for_each_present_cpu(cpu) { - int apicid = per_cpu(x86_cpu_to_apicid, cpu); + bytes = sizeof(void *) * uv_num_possible_blades(); + __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); + BUG_ON(!__uv_hub_info_list); - nid = cpu_to_node(cpu); - /* - * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); - */ - uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; - uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; - uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; + bytes = sizeof(struct uv_hub_info_s); + for_each_node(nodeid) { + struct uv_hub_info_s *new_hub; - uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; - uv_cpu_hub_info(cpu)->n_lshift = n_lshift; + if (__uv_hub_info_list[nodeid]) { + pr_err("UV: Node %d UV HUB already initialized!?\n", + nodeid); + BUG(); + } + + /* Allocate new per hub info list */ + new_hub = (nodeid == 0) ? + &uv_hub_info_node0 : + kzalloc_node(bytes, GFP_KERNEL, nodeid); + BUG_ON(!new_hub); + __uv_hub_info_list[nodeid] = new_hub; + new_hub = uv_hub_info_list(nodeid); + BUG_ON(!new_hub); + *new_hub = hub_info; + + /* Use information from GAM table if available */ + if (_node_to_pnode) + new_hub->pnode = _node_to_pnode[nodeid]; + else /* Fill in during cpu loop */ + new_hub->pnode = 0xffff; + new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); + new_hub->memory_nid = -1; + new_hub->nr_possible_cpus = 0; + new_hub->nr_online_cpus = 0; + } + /* Initialize per cpu info */ + for_each_possible_cpu(cpu) { + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + int numa_node_id; + unsigned short pnode; + + nodeid = cpu_to_node(cpu); + numa_node_id = numa_cpu_node(cpu); pnode = uv_apicid_to_pnode(apicid); - blade = boot_pnode_to_blade(pnode); - lcpu = uv_blade_info[blade].nr_possible_cpus; - uv_blade_info[blade].nr_possible_cpus++; - - /* Any node on the blade, else will contain -1. */ - uv_blade_info[blade].memory_nid = nid; - - uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; - uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; - uv_cpu_hub_info(cpu)->m_val = m_val; - uv_cpu_hub_info(cpu)->n_val = n_val; - uv_cpu_hub_info(cpu)->numa_blade_id = blade; - uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; - uv_cpu_hub_info(cpu)->pnode = pnode; - uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; - uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; - uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; - uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; - uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); - uv_node_to_blade[nid] = blade; - uv_cpu_to_blade[cpu] = blade; + + uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); + uv_cpu_info_per(cpu)->blade_cpu_id = + uv_cpu_hub_info(cpu)->nr_possible_cpus++; + if (uv_cpu_hub_info(cpu)->memory_nid == -1) + uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); + if (nodeid != numa_node_id && /* init memoryless node */ + uv_hub_info_list(numa_node_id)->pnode == 0xffff) + uv_hub_info_list(numa_node_id)->pnode = pnode; + else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) + uv_cpu_hub_info(cpu)->pnode = pnode; + uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid); } - /* Add blade/pnode info for nodes without cpus */ - for_each_online_node(nid) { - if (uv_node_to_blade[nid] >= 0) - continue; - paddr = node_start_pfn(nid) << PAGE_SHIFT; - pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); - blade = boot_pnode_to_blade(pnode); - uv_node_to_blade[nid] = blade; + for_each_node(nodeid) { + unsigned short pnode = uv_hub_info_list(nodeid)->pnode; + + /* Add pnode info for pre-GAM list nodes without cpus */ + if (pnode == 0xffff) { + unsigned long paddr; + + paddr = node_start_pfn(nodeid) << PAGE_SHIFT; + pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); + uv_hub_info_list(nodeid)->pnode = pnode; + } + min_pnode = min(pnode, min_pnode); + max_pnode = max(pnode, max_pnode); + pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", + nodeid, + uv_hub_info_list(nodeid)->pnode, + uv_hub_info_list(nodeid)->nr_possible_cpus); } + pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); map_gru_high(max_pnode); map_mmr_high(max_pnode); map_mmioh_high(min_pnode, max_pnode); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9307f182fe30..c7364bd633e1 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -2267,7 +2267,7 @@ static int __init apm_init(void) dmi_check_system(apm_dmi_table); - if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { + if (apm_info.bios.version == 0 || machine_is_olpc()) { printk(KERN_INFO "apm: BIOS not found.\n"); return -ENODEV; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 5c042466f274..674134e9f5e5 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -80,6 +80,7 @@ void common(void) { OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); + OFFSET(BP_init_size, boot_params, hdr.init_size); OFFSET(BP_pref_address, boot_params, hdr.pref_address); OFFSET(BP_code32_start, boot_params, hdr.code32_start); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7b76eb67a9b3..c343a54bed39 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c) * can safely set X86_FEATURE_EXTD_APICID unconditionally for families * after 16h. */ - if (cpu_has_apic && c->x86 > 0x16) { - set_cpu_cap(c, X86_FEATURE_EXTD_APICID); - } else if (cpu_has_apic && c->x86 >= 0xf) { - /* check CPU config space for extended APIC ID */ - unsigned int val; - val = read_pci_config(0, 24, 0, 0x68); - if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) + if (boot_cpu_has(X86_FEATURE_APIC)) { + if (c->x86 > 0x16) set_cpu_cap(c, X86_FEATURE_EXTD_APICID); + else if (c->x86 >= 0xf) { + /* check CPU config space for extended APIC ID */ + unsigned int val; + + val = read_pci_config(0, 24, 0, 0x68); + if ((val >> 17 & 0x3) == 0x3) + set_cpu_cap(c, X86_FEATURE_EXTD_APICID); + } } #endif @@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c) */ msr_set_bit(MSR_K7_HWCR, 6); #endif + set_cpu_bug(c, X86_BUG_SWAPGS_FENCE); } static void init_amd_gh(struct cpuinfo_x86 *c) @@ -746,7 +750,7 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); - if (cpu_has_xmm2) { + if (cpu_has(c, X86_FEATURE_XMM2)) { /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8394b3d1f94f..6ef6ed9ccca6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -430,7 +430,7 @@ void load_percpu_segment(int cpu) #ifdef CONFIG_X86_32 loadsegment(fs, __KERNEL_PERCPU); #else - loadsegment(gs, 0); + __loadsegment_simple(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif load_stack_canary_segment(); @@ -717,6 +717,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c) } } + if (c->extended_cpuid_level >= 0x80000007) { + cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + + c->x86_capability[CPUID_8000_0007_EBX] = ebx; + c->x86_power = edx; + } + if (c->extended_cpuid_level >= 0x80000008) { cpuid(0x80000008, &eax, &ebx, &ecx, &edx); @@ -729,9 +736,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_phys_bits = 36; #endif - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - if (c->extended_cpuid_level >= 0x8000000a) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); @@ -862,30 +866,34 @@ static void detect_nopl(struct cpuinfo_x86 *c) #else set_cpu_cap(c, X86_FEATURE_NOPL); #endif +} +static void detect_null_seg_behavior(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 /* - * ESPFIX is a strange bug. All real CPUs have it. Paravirt - * systems that run Linux at CPL > 0 may or may not have the - * issue, but, even if they have the issue, there's absolutely - * nothing we can do about it because we can't use the real IRET - * instruction. + * Empirically, writing zero to a segment selector on AMD does + * not clear the base, whereas writing zero to a segment + * selector on Intel does clear the base. Intel's behavior + * allows slightly faster context switches in the common case + * where GS is unused by the prev and next threads. * - * NB: For the time being, only 32-bit kernels support - * X86_BUG_ESPFIX as such. 64-bit kernels directly choose - * whether to apply espfix using paravirt hooks. If any - * non-paravirt system ever shows up that does *not* have the - * ESPFIX issue, we can change this. + * Since neither vendor documents this anywhere that I can see, + * detect it directly instead of hardcoding the choice by + * vendor. + * + * I've designated AMD's behavior as the "bug" because it's + * counterintuitive and less friendly. */ -#ifdef CONFIG_X86_32 -#ifdef CONFIG_PARAVIRT - do { - extern void native_iret(void); - if (pv_cpu_ops.iret == native_iret) - set_cpu_bug(c, X86_BUG_ESPFIX); - } while (0); -#else - set_cpu_bug(c, X86_BUG_ESPFIX); -#endif + + unsigned long old_base, tmp; + rdmsrl(MSR_FS_BASE, old_base); + wrmsrl(MSR_FS_BASE, 1); + loadsegment(fs, 0); + rdmsrl(MSR_FS_BASE, tmp); + if (tmp != 0) + set_cpu_bug(c, X86_BUG_NULL_SEG); + wrmsrl(MSR_FS_BASE, old_base); #endif } @@ -921,6 +929,33 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ detect_nopl(c); + + detect_null_seg_behavior(c); + + /* + * ESPFIX is a strange bug. All real CPUs have it. Paravirt + * systems that run Linux at CPL > 0 may or may not have the + * issue, but, even if they have the issue, there's absolutely + * nothing we can do about it because we can't use the real IRET + * instruction. + * + * NB: For the time being, only 32-bit kernels support + * X86_BUG_ESPFIX as such. 64-bit kernels directly choose + * whether to apply espfix using paravirt hooks. If any + * non-paravirt system ever shows up that does *not* have the + * ESPFIX issue, we can change this. + */ +#ifdef CONFIG_X86_32 +# ifdef CONFIG_PARAVIRT + do { + extern void native_iret(void); + if (pv_cpu_ops.iret == native_iret) + set_cpu_bug(c, X86_BUG_ESPFIX); + } while (0); +# else + set_cpu_bug(c, X86_BUG_ESPFIX); +# endif +#endif } static void x86_init_cache_qos(struct cpuinfo_x86 *c) @@ -1076,12 +1111,12 @@ void enable_sep_cpu(void) struct tss_struct *tss; int cpu; + if (!boot_cpu_has(X86_FEATURE_SEP)) + return; + cpu = get_cpu(); tss = &per_cpu(cpu_tss, cpu); - if (!boot_cpu_has(X86_FEATURE_SEP)) - goto out; - /* * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- * see the big comment in struct x86_hw_tss's definition. @@ -1096,7 +1131,6 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); -out: put_cpu(); } #endif @@ -1528,7 +1562,7 @@ void cpu_init(void) pr_info("Initializing CPU#%d\n", cpu); if (cpu_feature_enabled(X86_FEATURE_VME) || - cpu_has_tsc || + boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 6adef9cac23e..bd9dcd6b712d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) switch (dir0_lsn) { case 0xd: /* either a 486SLC or DLC w/o DEVID */ dir0_msn = 0; - p = Cx486_name[(cpu_has_fpu ? 1 : 0)]; + p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)]; break; case 0xe: /* a 486S A step */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1f7fdb91a818..8dae51fd3db1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -152,9 +152,9 @@ static void early_init_intel(struct cpuinfo_x86 *c) * the TLB when any changes are made to any of the page table entries. * The operating system must reload CR3 to cause the TLB to be flushed" * - * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should - * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE - * to be modified + * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h + * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified. */ if (c->x86 == 5 && c->x86_model == 9) { pr_info("Disabling PGE capability bit\n"); @@ -233,7 +233,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * The Quark is also family 5, but does not have the same bug. */ clear_cpu_bug(c, X86_BUG_F00F); - if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) { + if (c->x86 == 5 && c->x86_model < 9) { static int f00f_workaround_enabled; set_cpu_bug(c, X86_BUG_F00F); @@ -281,7 +281,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * integrated APIC (see 11AP erratum in "Pentium Processor * Specification Update"). */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && (c->x86_mask < 0x6 || c->x86_mask == 0xb)) set_cpu_bug(c, X86_BUG_11AP); @@ -336,7 +336,7 @@ static int intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; - if (c->cpuid_level < 4) + if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) return 1; /* Intel has a non-standard dependency on %ecx for this CPUID level. */ @@ -456,7 +456,7 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } - if (cpu_has_xmm2) + if (cpu_has(c, X86_FEATURE_XMM2)) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (boot_cpu_has(X86_FEATURE_DS)) { @@ -468,7 +468,7 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && cpu_has_clflush && + if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 2658e2af74ec..93d824ec3120 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool; static LLIST_HEAD(mce_event_llist); static char gen_pool_buf[MCE_POOLSZ]; +/* + * Compare the record "t" with each of the records on list "l" to see if + * an equivalent one is present in the list. + */ +static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l) +{ + struct mce_evt_llist *node; + struct mce *m1, *m2; + + m1 = &t->mce; + + llist_for_each_entry(node, &l->llnode, llnode) { + m2 = &node->mce; + + if (!mce_cmp(m1, m2)) + return true; + } + return false; +} + +/* + * The system has panicked - we'd like to peruse the list of MCE records + * that have been queued, but not seen by anyone yet. The list is in + * reverse time order, so we need to reverse it. While doing that we can + * also drop duplicate records (these were logged because some banks are + * shared between cores or by all threads on a socket). + */ +struct llist_node *mce_gen_pool_prepare_records(void) +{ + struct llist_node *head; + LLIST_HEAD(new_head); + struct mce_evt_llist *node, *t; + + head = llist_del_all(&mce_event_llist); + if (!head) + return NULL; + + /* squeeze out duplicates while reversing order */ + llist_for_each_entry_safe(node, t, head, llnode) { + if (!is_duplicate_mce_record(node, t)) + llist_add(&node->llnode, &new_head); + } + + return new_head.first; +} + void mce_gen_pool_process(void) { struct llist_node *head; diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 547720efd923..cd74a3f00aea 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -35,6 +35,7 @@ void mce_gen_pool_process(void); bool mce_gen_pool_empty(void); int mce_gen_pool_add(struct mce *mce); int mce_gen_pool_init(void); +struct llist_node *mce_gen_pool_prepare_records(void); extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); struct dentry *mce_get_debugfs_dir(void); @@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id) #endif void mce_inject_log(struct mce *m); + +/* + * We consider records to be equivalent if bank+status+addr+misc all match. + * This is only used when the system is going down because of a fatal error + * to avoid cluttering the console log with essentially repeated information. + * In normal processing all errors seen are logged. + */ +static inline bool mce_cmp(struct mce *m1, struct mce *m2) +{ + return m1->bank != m2->bank || + m1->status != m2->status || + m1->addr != m2->addr || + m1->misc != m2->misc; +} diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 5119766d9889..631356c8cca4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -204,6 +204,33 @@ static int error_context(struct mce *m) return IN_KERNEL; } +static int mce_severity_amd_smca(struct mce *m, int err_ctx) +{ + u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); + u32 low, high; + + /* + * We need to look at the following bits: + * - "succor" bit (data poisoning support), and + * - TCC bit (Task Context Corrupt) + * in MCi_STATUS to determine error severity. + */ + if (!mce_flags.succor) + return MCE_PANIC_SEVERITY; + + if (rdmsr_safe(addr, &low, &high)) + return MCE_PANIC_SEVERITY; + + /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */ + if ((low & MCI_CONFIG_MCAX) && + (m->status & MCI_STATUS_TCC) && + (err_ctx == IN_KERNEL)) + return MCE_PANIC_SEVERITY; + + /* ...otherwise invoke hwpoison handler. */ + return MCE_AR_SEVERITY; +} + /* * See AMD Error Scope Hierarchy table in a newer BKDG. For example * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" @@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc * to at least kill process to prolong system operation. */ if (mce_flags.overflow_recov) { + if (mce_flags.smca) + return mce_severity_amd_smca(m, ctx); + /* software can try to contain */ if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) return MCE_PANIC_SEVERITY; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index f0c921b03e42..92e5e37d97bf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -161,7 +161,6 @@ void mce_log(struct mce *mce) if (!mce_gen_pool_add(mce)) irq_work_queue(&mce_irq_work); - mce->finished = 0; wmb(); for (;;) { entry = mce_log_get_idx_check(mcelog.next); @@ -194,7 +193,6 @@ void mce_log(struct mce *mce) mcelog.entry[entry].finished = 1; wmb(); - mce->finished = 1; set_bit(0, &mce_need_notify); } @@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); +static inline u32 ctl_reg(int bank) +{ + return MSR_IA32_MCx_CTL(bank); +} + +static inline u32 status_reg(int bank) +{ + return MSR_IA32_MCx_STATUS(bank); +} + +static inline u32 addr_reg(int bank) +{ + return MSR_IA32_MCx_ADDR(bank); +} + +static inline u32 misc_reg(int bank) +{ + return MSR_IA32_MCx_MISC(bank); +} + +static inline u32 smca_ctl_reg(int bank) +{ + return MSR_AMD64_SMCA_MCx_CTL(bank); +} + +static inline u32 smca_status_reg(int bank) +{ + return MSR_AMD64_SMCA_MCx_STATUS(bank); +} + +static inline u32 smca_addr_reg(int bank) +{ + return MSR_AMD64_SMCA_MCx_ADDR(bank); +} + +static inline u32 smca_misc_reg(int bank) +{ + return MSR_AMD64_SMCA_MCx_MISC(bank); +} + +struct mca_msr_regs msr_ops = { + .ctl = ctl_reg, + .status = status_reg, + .addr = addr_reg, + .misc = misc_reg +}; + static void print_mce(struct mce *m) { int ret = 0; @@ -290,7 +335,9 @@ static void wait_for_panic(void) static void mce_panic(const char *msg, struct mce *final, char *exp) { - int i, apei_err = 0; + int apei_err = 0; + struct llist_node *pending; + struct mce_evt_llist *l; if (!fake_panic) { /* @@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) if (atomic_inc_return(&mce_fake_panicked) > 1) return; } + pending = mce_gen_pool_prepare_records(); /* First print corrected ones that are still unlogged */ - for (i = 0; i < MCE_LOG_LEN; i++) { - struct mce *m = &mcelog.entry[i]; - if (!(m->status & MCI_STATUS_VAL)) - continue; + llist_for_each_entry(l, pending, llnode) { + struct mce *m = &l->mce; if (!(m->status & MCI_STATUS_UC)) { print_mce(m); if (!apei_err) @@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) } } /* Now print uncorrected but with the final one last */ - for (i = 0; i < MCE_LOG_LEN; i++) { - struct mce *m = &mcelog.entry[i]; - if (!(m->status & MCI_STATUS_VAL)) - continue; + llist_for_each_entry(l, pending, llnode) { + struct mce *m = &l->mce; if (!(m->status & MCI_STATUS_UC)) continue; - if (!final || memcmp(m, final, sizeof(struct mce))) { + if (!final || mce_cmp(m, final)) { print_mce(m); if (!apei_err) apei_err = apei_write_mce(m); @@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr) if (msr == mca_cfg.rip_msr) return offsetof(struct mce, ip); - if (msr == MSR_IA32_MCx_STATUS(bank)) + if (msr == msr_ops.status(bank)) return offsetof(struct mce, status); - if (msr == MSR_IA32_MCx_ADDR(bank)) + if (msr == msr_ops.addr(bank)) return offsetof(struct mce, addr); - if (msr == MSR_IA32_MCx_MISC(bank)) + if (msr == msr_ops.misc(bank)) return offsetof(struct mce, misc); if (msr == MSR_IA32_MCG_STATUS) return offsetof(struct mce, mcgstatus); @@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = { static void mce_read_aux(struct mce *m, int i) { if (m->status & MCI_STATUS_MISCV) - m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); + m->misc = mce_rdmsrl(msr_ops.misc(i)); if (m->status & MCI_STATUS_ADDRV) { - m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); + m->addr = mce_rdmsrl(msr_ops.addr(i)); /* * Mask the reported address by the reported granularity. @@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m.tsc = 0; barrier(); - m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); + m.status = mce_rdmsrl(msr_ops.status(i)); if (!(m.status & MCI_STATUS_VAL)) continue; @@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) /* * Clear state for this bank. */ - mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); + mce_wrmsrl(msr_ops.status(i), 0); } /* @@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, char *tmp; for (i = 0; i < mca_cfg.banks; i++) { - m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); + m->status = mce_rdmsrl(msr_ops.status(i)); if (m->status & MCI_STATUS_VAL) { __set_bit(i, validp); if (quirk_no_way_out) @@ -830,9 +874,9 @@ static int mce_start(int *no_way_out) atomic_add(*no_way_out, &global_nwo); /* - * global_nwo should be updated before mce_callin + * Rely on the implied barrier below, such that global_nwo + * is updated before mce_callin. */ - smp_wmb(); order = atomic_inc_return(&mce_callin); /* @@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear) for (i = 0; i < mca_cfg.banks; i++) { if (test_bit(i, toclear)) - mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); + mce_wrmsrl(msr_ops.status(i), 0); } } @@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) int i; int worst = 0; int severity; + /* * Establish sequential order between the CPUs entering the machine * check handler. */ - int order; + int order = -1; /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. @@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); char *msg = "Unknown"; - int lmce = 0; + + /* + * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES + * on Intel. + */ + int lmce = 1; /* If this CPU is offline, just bail out. */ if (cpu_is_offline(smp_processor_id())) { @@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code) kill_it = 1; /* - * Check if this MCE is signaled to only this logical processor + * Check if this MCE is signaled to only this logical processor, + * on Intel only. */ - if (m.mcgstatus & MCG_STATUS_LMCES) - lmce = 1; - else { - /* - * Go through all the banks in exclusion of the other CPUs. - * This way we don't report duplicated events on shared banks - * because the first one to see it will clear it. - * If this is a Local MCE, then no need to perform rendezvous. - */ + if (m.cpuvendor == X86_VENDOR_INTEL) + lmce = m.mcgstatus & MCG_STATUS_LMCES; + + /* + * Go through all banks in exclusion of the other CPUs. This way we + * don't report duplicated events on shared banks because the first one + * to see it will clear it. If this is a Local MCE, then no need to + * perform rendezvous. + */ + if (!lmce) order = mce_start(&no_way_out); - } for (i = 0; i < cfg->banks; i++) { __clear_bit(i, toclear); @@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) m.addr = 0; m.bank = i; - m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); + m.status = mce_rdmsrl(msr_ops.status(i)); if ((m.status & MCI_STATUS_VAL) == 0) continue; @@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void) enum mcp_flags m_fl = 0; mce_banks_t all_banks; u64 cap; - int i; if (!mca_cfg.bootlog) m_fl = MCP_DONTLOG; @@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void) rdmsrl(MSR_IA32_MCG_CAP, cap); if (cap & MCG_CTL_P) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); +} + +static void __mcheck_cpu_init_clear_banks(void) +{ + int i; for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; if (!b->init) continue; - wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); - wrmsrl(MSR_IA32_MCx_STATUS(i), 0); + wrmsrl(msr_ops.ctl(i), b->ctl); + wrmsrl(msr_ops.status(i), 0); } } @@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) */ clear_bit(10, (unsigned long *)&mce_banks[4].ctl); } - if (c->x86 <= 17 && cfg->bootlog < 0) { + if (c->x86 < 17 && cfg->bootlog < 0) { /* * Lots of broken BIOS around that don't clear them * by default and leave crap in there. Don't log: @@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) break; case X86_VENDOR_AMD: { - u32 ebx = cpuid_ebx(0x80000007); + mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); + mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); + mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); - mce_flags.overflow_recov = !!(ebx & BIT(0)); - mce_flags.succor = !!(ebx & BIT(1)); - mce_flags.smca = !!(ebx & BIT(3)); + /* + * Install proper ops for Scalable MCA enabled processors + */ + if (mce_flags.smca) { + msr_ops.ctl = smca_ctl_reg; + msr_ops.status = smca_status_reg; + msr_ops.addr = smca_addr_reg; + msr_ops.misc = smca_misc_reg; + } mce_amd_feature_init(c); break; @@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); + __mcheck_cpu_init_clear_banks(); __mcheck_cpu_init_timer(); } @@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void) struct mce_bank *b = &mce_banks[i]; if (b->init) - wrmsrl(MSR_IA32_MCx_CTL(i), 0); + wrmsrl(msr_ops.ctl(i), 0); } return; } @@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void) { __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); + __mcheck_cpu_init_clear_banks(); } static struct syscore_ops mce_syscore_ops = { @@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data) if (!mce_available(raw_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); + __mcheck_cpu_init_clear_banks(); __mcheck_cpu_init_timer(); } @@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h) struct mce_bank *b = &mce_banks[i]; if (b->init) - wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); + wrmsrl(msr_ops.ctl(i), b->ctl); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9d656fd436ef..10b0661651e0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -54,14 +54,6 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -/* - * OS is required to set the MCAX bit to acknowledge that it is now using the - * new MSR ranges and new registers under each bank. It also means that the OS - * will configure deferred errors in the new MCx_CONFIG register. If the bit is - * not set, uncorrectable errors will cause a system panic. - */ -#define SMCA_MCAX_EN_OFF 0x1 - static const char * const th_names[] = { "load_store", "insn_fetch", @@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, /* Fall back to method we used for older processors: */ switch (block) { case 0: - addr = MSR_IA32_MCx_MISC(bank); + addr = msr_ops.misc(bank); break; case 1: offset = ((low & MASK_BLKPTR_LO) >> 21); @@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, int offset, u32 misc_high) { unsigned int cpu = smp_processor_id(); + u32 smca_low, smca_high, smca_addr; struct threshold_block b; int new; @@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, b.interrupt_enable = 1; - if (mce_flags.smca) { - u32 smca_low, smca_high; - u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); + if (!mce_flags.smca) { + new = (misc_high & MASK_LVTOFF_HI) >> 20; + goto set_offset; + } - if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { - smca_high |= SMCA_MCAX_EN_OFF; - wrmsr(smca_addr, smca_low, smca_high); - } + smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); - /* Gather LVT offset for thresholding: */ - if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) - goto out; + if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { + /* + * OS is required to set the MCAX bit to acknowledge that it is + * now using the new MSR ranges and new registers under each + * bank. It also means that the OS will configure deferred + * errors in the new MCx_CONFIG register. If the bit is not set, + * uncorrectable errors will cause a system panic. + * + * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.) + */ + smca_high |= BIT(0); - new = (smca_low & SMCA_THR_LVT_OFF) >> 12; - } else { - new = (misc_high & MASK_LVTOFF_HI) >> 20; + /* + * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR} + * registers with the option of additionally logging to + * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set. + * + * This bit is usually set by BIOS to retain the old behavior + * for OSes that don't use the new registers. Linux supports the + * new registers so let's disable that additional logging here. + * + * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high + * portion of the MSR). + */ + smca_high &= ~BIT(2); + + wrmsr(smca_addr, smca_low, smca_high); } + /* Gather LVT offset for thresholding: */ + if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) + goto out; + + new = (smca_low & SMCA_THR_LVT_OFF) >> 12; + +set_offset: offset = setup_APIC_mce_threshold(offset, new); if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) @@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } -static void __log_error(unsigned int bank, bool threshold_err, u64 misc) +static void +__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) { + u32 msr_status = msr_ops.status(bank); + u32 msr_addr = msr_ops.addr(bank); struct mce m; u64 status; - rdmsrl(MSR_IA32_MCx_STATUS(bank), status); + WARN_ON_ONCE(deferred_err && threshold_err); + + if (deferred_err && mce_flags.smca) { + msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank); + msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank); + } + + rdmsrl(msr_status, status); + if (!(status & MCI_STATUS_VAL)) return; @@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) m.misc = misc; if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr); + rdmsrl(msr_addr, m.addr); mce_log(&m); - wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); + + wrmsrl(msr_status, 0); } static inline void __smp_deferred_error_interrupt(void) @@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void) /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { - u64 status; unsigned int bank; + u32 msr_status; + u64 status; for (bank = 0; bank < mca_cfg.banks; ++bank) { - rdmsrl(MSR_IA32_MCx_STATUS(bank), status); + msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank) + : msr_ops.status(bank); + + rdmsrl(msr_status, status); if (!(status & MCI_STATUS_VAL) || !(status & MCI_STATUS_DEFERRED)) continue; - __log_error(bank, false, 0); + __log_error(bank, true, false, 0); break; } } @@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void) return; log: - __log_error(bank, true, ((u64)high << 32) | low); + __log_error(bank, false, true, ((u64)high << 32) | low); } /* diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 1e8bb6c94f14..1defb8ea882c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -84,7 +84,7 @@ static int cmci_supported(int *banks) */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return 0; - if (!cpu_has_apic || lapic_get_maxlvt() < 6) + if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) return 0; rdmsrl(MSR_IA32_MCG_CAP, cap); *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index ac780cad3b86..6b9dc4d18ccc 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs) /* Thermal monitoring depends on APIC, ACPI and clock modulation */ static int intel_thermal_supported(struct cpuinfo_x86 *c) { - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return 0; if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) return 0; diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index f8c81ba0b465..b1086f79e57e 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -137,7 +137,7 @@ static void prepare_set(void) u32 cr0; /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4 = __read_cr4(); __write_cr4(cr4 & ~X86_CR4_PGE); } @@ -170,7 +170,7 @@ static void post_set(void) write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) __write_cr4(cr4); } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 19f57360dfd2..16e37a2581ac 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -444,11 +444,24 @@ static void __init print_mtrr_state(void) pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); } +/* PAT setup for BP. We need to go through sync steps here */ +void __init mtrr_bp_pat_init(void) +{ + unsigned long flags; + + local_irq_save(flags); + prepare_set(); + + pat_init(); + + post_set(); + local_irq_restore(flags); +} + /* Grab all of the MTRR state for this CPU into *state */ bool __init get_mtrr_state(void) { struct mtrr_var_range *vrs; - unsigned long flags; unsigned lo, dummy; unsigned int i; @@ -481,15 +494,6 @@ bool __init get_mtrr_state(void) mtrr_state_set = 1; - /* PAT setup for BP. We need to go through sync steps here */ - local_irq_save(flags); - prepare_set(); - - pat_init(); - - post_set(); - local_irq_restore(flags); - return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); } @@ -741,7 +745,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) wbinvd(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4 = __read_cr4(); __write_cr4(cr4 & ~X86_CR4_PGE); } @@ -771,7 +775,7 @@ static void post_set(void) __releases(set_atomicity_lock) write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) __write_cr4(cr4); raw_spin_unlock(&set_atomicity_lock); } diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 10f8d4796240..7d393ecdeee6 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -752,6 +752,9 @@ void __init mtrr_bp_init(void) /* BIOS may override */ __mtrr_enabled = get_mtrr_state(); + if (mtrr_enabled()) + mtrr_bp_pat_init(); + if (mtrr_cleanup(phys_addr)) { changed_by_mtrr_cleanup = 1; mtrr_if->set_all(); @@ -759,8 +762,16 @@ void __init mtrr_bp_init(void) } } - if (!mtrr_enabled()) + if (!mtrr_enabled()) { pr_info("MTRR: Disabled\n"); + + /* + * PAT initialization relies on MTRR's rendezvous handler. + * Skip PAT init until the handler can initialize both + * features independently. + */ + pat_disable("MTRRs disabled, skipping PAT initialization too."); + } } void mtrr_ap_init(void) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 951884dcc433..6c7ced07d16d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); bool get_mtrr_state(void); +void mtrr_bp_pat_init(void); extern void set_mtrr_ops(const struct mtrr_ops *ops); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 364e58346897..8cac429b6a1d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -94,7 +94,7 @@ static void __init vmware_platform_setup(void) */ static uint32_t __init vmware_platform(void) { - if (cpu_has_hypervisor) { + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { unsigned int eax; unsigned int hyper_vendor_id[3]; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 1f4acd68b98b..3fe45f84ced4 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -151,7 +151,7 @@ static void __init dtb_lapic_setup(void) return; /* Did the boot loader setup the local APIC ? */ - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { if (apic_force_enable(r.start)) return; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8efa57a5f29e..2bb25c3fe2e8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -260,19 +260,12 @@ int __die(const char *str, struct pt_regs *regs, long err) unsigned long sp; #endif printk(KERN_DEFAULT - "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif - if (debug_pagealloc_enabled()) - printk("DEBUG_PAGEALLOC "); -#ifdef CONFIG_KASAN - printk("KASAN"); -#endif - printk("\n"); + "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, + IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", + IS_ENABLED(CONFIG_SMP) ? " SMP" : "", + debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", + IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/ebda.c index 992f442ca155..afe65dffee80 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/ebda.c @@ -38,7 +38,7 @@ void __init reserve_ebda_region(void) * that the paravirt case can handle memory setup * correctly, without our help. */ - if (paravirt_enabled()) + if (!x86_platform.legacy.ebda_search) return; /* end of low (conventional) memory */ diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index dd9ca9b60ff3..aad34aafc0e0 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -21,11 +21,15 @@ static double __initdata y = 3145727.0; * We should really only care about bugs here * anyway. Not features. */ -static void __init check_fpu(void) +void __init fpu__init_check_bugs(void) { u32 cr0_saved; s32 fdiv_bug; + /* kernel_fpu_begin/end() relies on patched alternative instructions. */ + if (!boot_cpu_has(X86_FEATURE_FPU)) + return; + /* We might have CR0::TS set already, clear it: */ cr0_saved = read_cr0(); write_cr0(cr0_saved & ~X86_CR0_TS); @@ -59,13 +63,3 @@ static void __init check_fpu(void) pr_warn("Hmm, FPU with FDIV bug\n"); } } - -void __init fpu__init_check_bugs(void) -{ - /* - * kernel_fpu_begin/end() in check_fpu() relies on the patched - * alternative instructions. - */ - if (cpu_has_fpu) - check_fpu(); -} diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8e37cc8a539a..97027545a72d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -217,14 +217,14 @@ static inline void fpstate_init_fstate(struct fregs_state *fp) void fpstate_init(union fpregs_state *state) { - if (!cpu_has_fpu) { + if (!static_cpu_has(X86_FEATURE_FPU)) { fpstate_init_soft(&state->soft); return; } memset(state, 0, xstate_size); - if (cpu_has_fxsr) + if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else fpstate_init_fstate(&state->fsave); @@ -237,7 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; - if (!src_fpu->fpstate_active || !cpu_has_fpu) + if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); @@ -506,33 +506,6 @@ void fpu__clear(struct fpu *fpu) * x87 math exception handling: */ -static inline unsigned short get_fpu_cwd(struct fpu *fpu) -{ - if (cpu_has_fxsr) { - return fpu->state.fxsave.cwd; - } else { - return (unsigned short)fpu->state.fsave.cwd; - } -} - -static inline unsigned short get_fpu_swd(struct fpu *fpu) -{ - if (cpu_has_fxsr) { - return fpu->state.fxsave.swd; - } else { - return (unsigned short)fpu->state.fsave.swd; - } -} - -static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) -{ - if (cpu_has_xmm) { - return fpu->state.fxsave.mxcsr; - } else { - return MXCSR_DEFAULT; - } -} - int fpu__exception_code(struct fpu *fpu, int trap_nr) { int err; @@ -547,10 +520,15 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * so if this combination doesn't produce any single exception, * then we have a bad program that isn't synchronizing its FPU usage * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception + * fully reproduce the context of the exception. */ - cwd = get_fpu_cwd(fpu); - swd = get_fpu_swd(fpu); + if (boot_cpu_has(X86_FEATURE_FXSR)) { + cwd = fpu->state.fxsave.cwd; + swd = fpu->state.fxsave.swd; + } else { + cwd = (unsigned short)fpu->state.fsave.cwd; + swd = (unsigned short)fpu->state.fsave.swd; + } err = swd & ~cwd; } else { @@ -560,7 +538,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * unmasked exception was caught we must mask the exception mask bits * at 0x1f80, and then use these to mask the exception bits at 0x3f. */ - unsigned short mxcsr = get_fpu_mxcsr(fpu); + unsigned short mxcsr = MXCSR_DEFAULT; + + if (boot_cpu_has(X86_FEATURE_XMM)) + mxcsr = fpu->state.fxsave.mxcsr; + err = ~(mxcsr >> 7) & mxcsr; } diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 54c86fffbf9f..aacfd7a82cec 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -29,22 +29,22 @@ static void fpu__init_cpu_generic(void) unsigned long cr0; unsigned long cr4_mask = 0; - if (cpu_has_fxsr) + if (boot_cpu_has(X86_FEATURE_FXSR)) cr4_mask |= X86_CR4_OSFXSR; - if (cpu_has_xmm) + if (boot_cpu_has(X86_FEATURE_XMM)) cr4_mask |= X86_CR4_OSXMMEXCPT; if (cr4_mask) cr4_set_bits(cr4_mask); cr0 = read_cr0(); cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!cpu_has_fpu) + if (!boot_cpu_has(X86_FEATURE_FPU)) cr0 |= X86_CR0_EM; write_cr0(cr0); /* Flush out any pending x87 state: */ #ifdef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) + if (!boot_cpu_has(X86_FEATURE_FPU)) fpstate_init_soft(¤t->thread.fpu.state.soft); else #endif @@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) } #ifndef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) { + if (!boot_cpu_has(X86_FEATURE_FPU)) { pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); for (;;) asm volatile("hlt"); @@ -106,7 +106,7 @@ static void __init fpu__init_system_mxcsr(void) { unsigned int mask = 0; - if (cpu_has_fxsr) { + if (boot_cpu_has(X86_FEATURE_FXSR)) { /* Static because GCC does not get 16-byte stack alignment right: */ static struct fxregs_state fxregs __initdata; @@ -212,7 +212,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) * fpu__init_system_xstate(). */ - if (!cpu_has_fpu) { + if (!boot_cpu_has(X86_FEATURE_FPU)) { /* * Disable xsave as we do not support it if i387 * emulation is enabled. @@ -221,7 +221,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); xstate_size = sizeof(struct swregs_state); } else { - if (cpu_has_fxsr) + if (boot_cpu_has(X86_FEATURE_FXSR)) xstate_size = sizeof(struct fxregs_state); else xstate_size = sizeof(struct fregs_state); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 8bd1c003942a..81422dfb152b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -21,7 +21,10 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r { struct fpu *target_fpu = &target->thread.fpu; - return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; + if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active) + return regset->n; + else + return 0; } int xfpregs_get(struct task_struct *target, const struct user_regset *regset, @@ -30,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, { struct fpu *fpu = &target->thread.fpu; - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return -ENODEV; fpu__activate_fpstate_read(fpu); @@ -47,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, struct fpu *fpu = &target->thread.fpu; int ret; - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return -ENODEV; fpu__activate_fpstate_write(fpu); @@ -65,7 +68,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, * update the header bits in the xsave header, indicating the * presence of FP and SSE state. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; return ret; @@ -79,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, struct xregs_state *xsave; int ret; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; fpu__activate_fpstate_read(fpu); @@ -108,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, struct xregs_state *xsave; int ret; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; fpu__activate_fpstate_write(fpu); @@ -275,10 +278,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_read(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) + if (!boot_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpu->state.fsave, 0, -1); @@ -306,10 +309,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_write(fpu); fpstate_sanitize_xstate(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) + if (!boot_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpu->state.fsave, 0, -1); @@ -325,7 +328,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * update the header bit in the xsave header, indicating the * presence of FP. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; return ret; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b48ef35b28d4..4ea2a59483c7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -190,7 +190,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) */ void fpu__init_cpu_xstate(void) { - if (!cpu_has_xsave || !xfeatures_mask) + if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) return; cr4_set_bits(X86_CR4_OSXSAVE); @@ -280,7 +280,7 @@ static void __init setup_xstate_comp(void) xstate_comp_offsets[0] = 0; xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); - if (!cpu_has_xsaves) { + if (!boot_cpu_has(X86_FEATURE_XSAVES)) { for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { if (xfeature_enabled(i)) { xstate_comp_offsets[i] = xstate_offsets[i]; @@ -316,13 +316,13 @@ static void __init setup_init_fpu_buf(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return; setup_xstate_features(); print_xstate_features(); - if (cpu_has_xsaves) { + if (boot_cpu_has(X86_FEATURE_XSAVES)) { init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; init_fpstate.xsave.header.xfeatures = xfeatures_mask; } @@ -417,7 +417,7 @@ static int xfeature_size(int xfeature_nr) */ static int using_compacted_format(void) { - return cpu_has_xsaves; + return boot_cpu_has(X86_FEATURE_XSAVES); } static void __xstate_dump_leaves(void) @@ -549,7 +549,7 @@ static unsigned int __init calculate_xstate_size(void) unsigned int eax, ebx, ecx, edx; unsigned int calculated_xstate_size; - if (!cpu_has_xsaves) { + if (!boot_cpu_has(X86_FEATURE_XSAVES)) { /* * - CPUID function 0DH, sub-function 0: * EBX enumerates the size (in bytes) required by @@ -630,7 +630,7 @@ void __init fpu__init_system_xstate(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - if (!cpu_has_xsave) { + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { pr_info("x86/fpu: Legacy x87 FPU detected.\n"); return; } @@ -667,7 +667,7 @@ void __init fpu__init_system_xstate(void) pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask, xstate_size, - cpu_has_xsaves ? "compacted" : "standard"); + boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); } /* @@ -678,7 +678,7 @@ void fpu__resume_cpu(void) /* * Restore XCR0 on xsave capable CPUs: */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 2911ef3a9f1c..d784bb547a9d 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -34,6 +34,8 @@ asmlinkage __visible void __init i386_start_kernel(void) cr4_init_shadow(); sanitize_boot_params(&boot_params); + x86_early_init_platform_quirks(); + /* Call the subarch specific early setup function */ switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_INTEL_MID: diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1f4422d5c8d0..b72fb0b71dd1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -182,6 +182,7 @@ void __init x86_64_start_reservations(char *real_mode_data) if (!boot_params.hdr.version) copy_bootdata(__va(real_mode_data)); + x86_early_init_platform_quirks(); reserve_ebda_region(); switch (boot_params.hdr.hardware_subarch) { diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index af1112980dd4..6f8902b0d151 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -555,62 +555,53 @@ early_idt_handler_common: */ cld - cmpl $2,(%esp) # X86_TRAP_NMI - je .Lis_nmi # Ignore NMI - - cmpl $2,%ss:early_recursion_flag - je hlt_loop incl %ss:early_recursion_flag - push %eax # 16(%esp) - push %ecx # 12(%esp) - push %edx # 8(%esp) - push %ds # 4(%esp) - push %es # 0(%esp) - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es - - cmpl $(__KERNEL_CS),32(%esp) - jne 10f + /* The vector number is in pt_regs->gs */ - leal 28(%esp),%eax # Pointer to %eip - call early_fixup_exception - andl %eax,%eax - jnz ex_entry /* found an exception entry */ - -10: -#ifdef CONFIG_PRINTK - xorl %eax,%eax - movw %ax,2(%esp) /* clean up the segment values on some cpus */ - movw %ax,6(%esp) - movw %ax,34(%esp) - leal 40(%esp),%eax - pushl %eax /* %esp before the exception */ - pushl %ebx - pushl %ebp - pushl %esi - pushl %edi - movl %cr2,%eax - pushl %eax - pushl (20+6*4)(%esp) /* trapno */ - pushl $fault_msg - call printk -#endif - call dump_stack -hlt_loop: - hlt - jmp hlt_loop - -ex_entry: - pop %es - pop %ds - pop %edx - pop %ecx - pop %eax - decl %ss:early_recursion_flag -.Lis_nmi: - addl $8,%esp /* drop vector number and error code */ + cld + pushl %fs /* pt_regs->fs */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %es /* pt_regs->es */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %ds /* pt_regs->ds */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %eax /* pt_regs->ax */ + pushl %ebp /* pt_regs->bp */ + pushl %edi /* pt_regs->di */ + pushl %esi /* pt_regs->si */ + pushl %edx /* pt_regs->dx */ + pushl %ecx /* pt_regs->cx */ + pushl %ebx /* pt_regs->bx */ + + /* Fix up DS and ES */ + movl $(__KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + + /* Load the vector number into EDX */ + movl PT_GS(%esp), %edx + + /* Load GS into pt_regs->gs and clear high bits */ + movw %gs, PT_GS(%esp) + movw $0, PT_GS+2(%esp) + + movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */ + call early_fixup_exception + + popl %ebx /* pt_regs->bx */ + popl %ecx /* pt_regs->cx */ + popl %edx /* pt_regs->dx */ + popl %esi /* pt_regs->si */ + popl %edi /* pt_regs->di */ + popl %ebp /* pt_regs->bp */ + popl %eax /* pt_regs->ax */ + popl %ds /* pt_regs->ds */ + popl %es /* pt_regs->es */ + popl %fs /* pt_regs->fs */ + popl %gs /* pt_regs->gs */ + decl %ss:early_recursion_flag + addl $4, %esp /* pop pt_regs->orig_ax */ iret ENDPROC(early_idt_handler_common) @@ -647,10 +638,14 @@ ignore_int: popl %eax #endif iret + +hlt_loop: + hlt + jmp hlt_loop ENDPROC(ignore_int) __INITDATA .align 4 -early_recursion_flag: +GLOBAL(early_recursion_flag) .long 0 __REFDATA @@ -715,19 +710,6 @@ __INITRODATA int_msg: .asciz "Unknown interrupt or fault at: %p %p %p\n" -fault_msg: -/* fault info: */ - .ascii "BUG: Int %d: CR2 %p\n" -/* regs pushed in early_idt_handler: */ - .ascii " EDI %p ESI %p EBP %p EBX %p\n" - .ascii " ESP %p ES %p DS %p\n" - .ascii " EDX %p ECX %p EAX %p\n" -/* fault frame: */ - .ascii " vec %p err %p EIP %p CS %p flg %p\n" - .ascii "Stack: %p %p %p %p %p %p %p %p\n" - .ascii " %p %p %p %p %p %p %p %p\n" - .asciz " %p %p %p %p %p %p %p %p\n" - #include "../../x86/xen/xen-head.S" /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 22fbf9df61bb..5df831ef1442 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -20,6 +20,7 @@ #include <asm/processor-flags.h> #include <asm/percpu.h> #include <asm/nops.h> +#include "../entry/calling.h" #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -64,6 +65,14 @@ startup_64: * tables and then reload them. */ + /* + * Setup stack for verify_cpu(). "-8" because stack_start is defined + * this way, see below. Our best guess is a NULL ptr for stack + * termination heuristics and we don't want to break anything which + * might depend on it (kgdb, ...). + */ + leaq (__end_init_task - 8)(%rip), %rsp + /* Sanitize CPU configuration */ call verify_cpu @@ -350,90 +359,48 @@ early_idt_handler_common: */ cld - cmpl $2,(%rsp) # X86_TRAP_NMI - je .Lis_nmi # Ignore NMI - - cmpl $2,early_recursion_flag(%rip) - jz 1f incl early_recursion_flag(%rip) - pushq %rax # 64(%rsp) - pushq %rcx # 56(%rsp) - pushq %rdx # 48(%rsp) - pushq %rsi # 40(%rsp) - pushq %rdi # 32(%rsp) - pushq %r8 # 24(%rsp) - pushq %r9 # 16(%rsp) - pushq %r10 # 8(%rsp) - pushq %r11 # 0(%rsp) - - cmpl $__KERNEL_CS,96(%rsp) - jne 11f - - cmpl $14,72(%rsp) # Page fault? + /* The vector number is currently in the pt_regs->di slot. */ + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* RSI = vector number */ + movq %rdi, 8(%rsp) /* pt_regs->di = RDI */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->bx */ + pushq %rbp /* pt_regs->bp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + cmpq $14,%rsi /* Page fault? */ jnz 10f - GET_CR2_INTO(%rdi) # can clobber any volatile register if pv + GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ call early_make_pgtable andl %eax,%eax - jz 20f # All good + jz 20f /* All good */ 10: - leaq 88(%rsp),%rdi # Pointer to %rip + movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ call early_fixup_exception - andl %eax,%eax - jnz 20f # Found an exception entry - -11: -#ifdef CONFIG_EARLY_PRINTK - GET_CR2_INTO(%r9) # can clobber any volatile register if pv - movl 80(%rsp),%r8d # error code - movl 72(%rsp),%esi # vector number - movl 96(%rsp),%edx # %cs - movq 88(%rsp),%rcx # %rip - xorl %eax,%eax - leaq early_idt_msg(%rip),%rdi - call early_printk - cmpl $2,early_recursion_flag(%rip) - jz 1f - call dump_stack -#ifdef CONFIG_KALLSYMS - leaq early_idt_ripmsg(%rip),%rdi - movq 40(%rsp),%rsi # %rip again - call __print_symbol -#endif -#endif /* EARLY_PRINTK */ -1: hlt - jmp 1b - -20: # Exception table entry found or page table generated - popq %r11 - popq %r10 - popq %r9 - popq %r8 - popq %rdi - popq %rsi - popq %rdx - popq %rcx - popq %rax + +20: decl early_recursion_flag(%rip) -.Lis_nmi: - addq $16,%rsp # drop vector number and error code - INTERRUPT_RETURN + jmp restore_regs_and_iret ENDPROC(early_idt_handler_common) __INITDATA .balign 4 -early_recursion_flag: +GLOBAL(early_recursion_flag) .long 0 -#ifdef CONFIG_EARLY_PRINTK -early_idt_msg: - .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" -early_idt_ripmsg: - .asciz "RIP %s\n" -#endif /* CONFIG_EARLY_PRINTK */ - #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ GLOBAL(name) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a1f0e4a5c47e..f112af7aa62e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -54,7 +54,7 @@ struct hpet_dev { char name[10]; }; -inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) +static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) { return container_of(evtdev, struct hpet_dev, evt); } @@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = hpet_resume_counter, - .archdata = { .vclock_mode = VCLOCK_HPET }, }; static int hpet_clocksource_register(void) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e565e0e4d216..fc25f698d792 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -13,6 +13,7 @@ #include <linux/cpu.h> #include <asm/kprobes.h> #include <asm/alternative.h> +#include <asm/text-patching.h> #ifdef HAVE_JUMP_LABEL diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 2da6ee9ae69b..04cde527d728 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -45,6 +45,7 @@ #include <linux/uaccess.h> #include <linux/memory.h> +#include <asm/text-patching.h> #include <asm/debugreg.h> #include <asm/apicdef.h> #include <asm/apic.h> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index ae703acb85c1..38cf7a741250 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -51,6 +51,7 @@ #include <linux/ftrace.h> #include <linux/frame.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7b3b9d15c47a..4425f593f0ec 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -29,6 +29,7 @@ #include <linux/kallsyms.h> #include <linux/ftrace.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 807950860fb7..eea2a6f72b31 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -285,14 +285,6 @@ static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - /* - * KVM isn't paravirt in the sense of paravirt_enabled. A KVM - * guest kernel works like a bare metal kernel with additional - * features, and paravirt_enabled is about features that are - * missing. - */ - pv_info.paravirt_enabled = 0; - if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; @@ -522,7 +514,7 @@ static noinline uint32_t __kvm_cpuid_base(void) if (boot_cpu_data.cpuid_level < 0) return 0; /* So we don't blow up on old processors */ - if (cpu_has_hypervisor) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); return 0; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 005c03e93fc5..477ae806c2fa 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -31,6 +31,7 @@ #include <linux/jump_label.h> #include <linux/random.h> +#include <asm/text-patching.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/setup.h> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f08ac28b8136..7b3b3f24c3ea 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -294,7 +294,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) struct pv_info pv_info = { .name = "bare hardware", - .paravirt_enabled = 0, .kernel_rpl = 0, .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ @@ -339,8 +338,10 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .write_cr8 = native_write_cr8, #endif .wbinvd = native_wbinvd, - .read_msr = native_read_msr_safe, - .write_msr = native_write_msr_safe, + .read_msr = native_read_msr, + .write_msr = native_write_msr, + .read_msr_safe = native_read_msr_safe, + .write_msr_safe = native_write_msr_safe, .read_pmc = native_read_pmc, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c index 35ccf75696eb..f712dfdf1357 100644 --- a/arch/x86/kernel/pci-iommu_table.c +++ b/arch/x86/kernel/pci-iommu_table.c @@ -72,7 +72,7 @@ void __init check_iommu_entries(struct iommu_table_entry *start, } } #else -inline void check_iommu_entries(struct iommu_table_entry *start, +void __init check_iommu_entries(struct iommu_table_entry *start, struct iommu_table_entry *finish) { } diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c new file mode 100644 index 000000000000..b2f8a33b36ff --- /dev/null +++ b/arch/x86/kernel/platform-quirks.c @@ -0,0 +1,35 @@ +#include <linux/kernel.h> +#include <linux/init.h> + +#include <asm/setup.h> +#include <asm/bios_ebda.h> + +void __init x86_early_init_platform_quirks(void) +{ + x86_platform.legacy.rtc = 1; + x86_platform.legacy.ebda_search = 0; + x86_platform.legacy.devices.pnpbios = 1; + + switch (boot_params.hdr.hardware_subarch) { + case X86_SUBARCH_PC: + x86_platform.legacy.ebda_search = 1; + break; + case X86_SUBARCH_XEN: + case X86_SUBARCH_LGUEST: + case X86_SUBARCH_INTEL_MID: + case X86_SUBARCH_CE4100: + x86_platform.legacy.devices.pnpbios = 0; + x86_platform.legacy.rtc = 0; + break; + } + + if (x86_platform.set_legacy_features) + x86_platform.set_legacy_features(); +} + +#if defined(CONFIG_PNPBIOS) +bool __init arch_pnpbios_disabled(void) +{ + return x86_platform.legacy.devices.pnpbios == 0; +} +#endif diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6cbab31ac23a..6b16c36f0939 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task) } } -static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) -{ - struct user_desc ud = { - .base_addr = addr, - .limit = 0xfffff, - .seg_32bit = 1, - .limit_in_pages = 1, - .useable = 1, - }; - struct desc_struct *desc = t->thread.tls_array; - desc += tls; - fill_ldt(desc, &ud); -} - -static inline u32 read_32bit_tls(struct task_struct *t, int tls) -{ - return get_desc_base(&t->thread.tls_array[tls]); -} - int copy_thread_tls(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p, unsigned long tls) { @@ -169,9 +150,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); - p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; + p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; savesegment(fs, p->thread.fsindex); - p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; + p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -210,7 +191,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, */ if (clone_flags & CLONE_SETTLS) { #ifdef CONFIG_IA32_EMULATION - if (is_ia32_task()) + if (in_ia32_syscall()) err = do_set_thread_area(p, -1, (struct user_desc __user *)tls, 0); else @@ -282,7 +263,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(cpu_tss, cpu); - unsigned fsindex, gsindex; + unsigned prev_fsindex, prev_gsindex; fpu_switch_t fpu_switch; fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); @@ -292,8 +273,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * * (e.g. xen_load_tls()) */ - savesegment(fs, fsindex); - savesegment(gs, gsindex); + savesegment(fs, prev_fsindex); + savesegment(gs, prev_gsindex); /* * Load TLS before restoring any segments so that segment loads @@ -336,66 +317,104 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch FS and GS. * * These are even more complicated than DS and ES: they have - * 64-bit bases are that controlled by arch_prctl. Those bases - * only differ from the values in the GDT or LDT if the selector - * is 0. - * - * Loading the segment register resets the hidden base part of - * the register to 0 or the value from the GDT / LDT. If the - * next base address zero, writing 0 to the segment register is - * much faster than using wrmsr to explicitly zero the base. - * - * The thread_struct.fs and thread_struct.gs values are 0 - * if the fs and gs bases respectively are not overridden - * from the values implied by fsindex and gsindex. They - * are nonzero, and store the nonzero base addresses, if - * the bases are overridden. - * - * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should - * be impossible. - * - * Therefore we need to reload the segment registers if either - * the old or new selector is nonzero, and we need to override - * the base address if next thread expects it to be overridden. + * 64-bit bases are that controlled by arch_prctl. The bases + * don't necessarily match the selectors, as user code can do + * any number of things to cause them to be inconsistent. * - * This code is unnecessarily slow in the case where the old and - * new indexes are zero and the new base is nonzero -- it will - * unnecessarily write 0 to the selector before writing the new - * base address. + * We don't promise to preserve the bases if the selectors are + * nonzero. We also don't promise to preserve the base if the + * selector is zero and the base doesn't match whatever was + * most recently passed to ARCH_SET_FS/GS. (If/when the + * FSGSBASE instructions are enabled, we'll need to offer + * stronger guarantees.) * - * Note: This all depends on arch_prctl being the only way that - * user code can override the segment base. Once wrfsbase and - * wrgsbase are enabled, most of this code will need to change. + * As an invariant, + * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is + * impossible. */ - if (unlikely(fsindex | next->fsindex | prev->fs)) { + if (next->fsindex) { + /* Loading a nonzero value into FS sets the index and base. */ loadsegment(fs, next->fsindex); - - /* - * If user code wrote a nonzero value to FS, then it also - * cleared the overridden base address. - * - * XXX: if user code wrote 0 to FS and cleared the base - * address itself, we won't notice and we'll incorrectly - * restore the prior base address next time we reschdule - * the process. - */ - if (fsindex) - prev->fs = 0; + } else { + if (next->fsbase) { + /* Next index is zero but next base is nonzero. */ + if (prev_fsindex) + loadsegment(fs, 0); + wrmsrl(MSR_FS_BASE, next->fsbase); + } else { + /* Next base and index are both zero. */ + if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { + /* + * We don't know the previous base and can't + * find out without RDMSR. Forcibly clear it. + */ + loadsegment(fs, __USER_DS); + loadsegment(fs, 0); + } else { + /* + * If the previous index is zero and ARCH_SET_FS + * didn't change the base, then the base is + * also zero and we don't need to do anything. + */ + if (prev->fsbase || prev_fsindex) + loadsegment(fs, 0); + } + } } - if (next->fs) - wrmsrl(MSR_FS_BASE, next->fs); - prev->fsindex = fsindex; + /* + * Save the old state and preserve the invariant. + * NB: if prev_fsindex == 0, then we can't reliably learn the base + * without RDMSR because Intel user code can zero it without telling + * us and AMD user code can program any 32-bit value without telling + * us. + */ + if (prev_fsindex) + prev->fsbase = 0; + prev->fsindex = prev_fsindex; - if (unlikely(gsindex | next->gsindex | prev->gs)) { + if (next->gsindex) { + /* Loading a nonzero value into GS sets the index and base. */ load_gs_index(next->gsindex); - - /* This works (and fails) the same way as fsindex above. */ - if (gsindex) - prev->gs = 0; + } else { + if (next->gsbase) { + /* Next index is zero but next base is nonzero. */ + if (prev_gsindex) + load_gs_index(0); + wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase); + } else { + /* Next base and index are both zero. */ + if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { + /* + * We don't know the previous base and can't + * find out without RDMSR. Forcibly clear it. + * + * This contains a pointless SWAPGS pair. + * Fixing it would involve an explicit check + * for Xen or a new pvop. + */ + load_gs_index(__USER_DS); + load_gs_index(0); + } else { + /* + * If the previous index is zero and ARCH_SET_GS + * didn't change the base, then the base is + * also zero and we don't need to do anything. + */ + if (prev->gsbase || prev_gsindex) + load_gs_index(0); + } + } } - if (next->gs) - wrmsrl(MSR_KERNEL_GS_BASE, next->gs); - prev->gsindex = gsindex; + /* + * Save the old state and preserve the invariant. + * NB: if prev_gsindex == 0, then we can't reliably learn the base + * without RDMSR because Intel user code can zero it without telling + * us and AMD user code can program any 32-bit value without telling + * us. + */ + if (prev_gsindex) + prev->gsbase = 0; + prev->gsindex = prev_gsindex; switch_fpu_finish(next_fpu, fpu_switch); @@ -516,23 +535,11 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (addr >= TASK_SIZE_OF(task)) return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to - switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, GS_TLS, addr); - if (doit) { - load_TLS(&task->thread, cpu); - load_gs_index(GS_TLS_SEL); - } - task->thread.gsindex = GS_TLS_SEL; - task->thread.gs = 0; - } else { - task->thread.gsindex = 0; - task->thread.gs = addr; - if (doit) { - load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); - } + task->thread.gsindex = 0; + task->thread.gsbase = addr; + if (doit) { + load_gs_index(0); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); } put_cpu(); break; @@ -542,52 +549,30 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (addr >= TASK_SIZE_OF(task)) return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to - switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, FS_TLS, addr); - if (doit) { - load_TLS(&task->thread, cpu); - loadsegment(fs, FS_TLS_SEL); - } - task->thread.fsindex = FS_TLS_SEL; - task->thread.fs = 0; - } else { - task->thread.fsindex = 0; - task->thread.fs = addr; - if (doit) { - /* set the selector to 0 to not confuse - __switch_to */ - loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); - } + task->thread.fsindex = 0; + task->thread.fsbase = addr; + if (doit) { + /* set the selector to 0 to not confuse __switch_to */ + loadsegment(fs, 0); + ret = wrmsrl_safe(MSR_FS_BASE, addr); } put_cpu(); break; case ARCH_GET_FS: { unsigned long base; - if (task->thread.fsindex == FS_TLS_SEL) - base = read_32bit_tls(task, FS_TLS); - else if (doit) + if (doit) rdmsrl(MSR_FS_BASE, base); else - base = task->thread.fs; + base = task->thread.fsbase; ret = put_user(base, (unsigned long __user *)addr); break; } case ARCH_GET_GS: { unsigned long base; - unsigned gsindex; - if (task->thread.gsindex == GS_TLS_SEL) - base = read_32bit_tls(task, GS_TLS); - else if (doit) { - savesegment(gs, gsindex); - if (gsindex) - rdmsrl(MSR_KERNEL_GS_BASE, base); - else - base = task->thread.gs; - } else - base = task->thread.gs; + if (doit) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gsbase; ret = put_user(base, (unsigned long __user *)addr); break; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 32e9d9cbb884..e60ef918f53d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task, switch (offset) { case offsetof(struct user_regs_struct,fs): - /* - * If this is setting fs as for normal 64-bit use but - * setting fs_base has implicitly changed it, leave it. - */ - if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && - task->thread.fs != 0) || - (value == 0 && task->thread.fsindex == FS_TLS_SEL && - task->thread.fs == 0)) - break; task->thread.fsindex = value; if (task == current) loadsegment(fs, task->thread.fsindex); break; case offsetof(struct user_regs_struct,gs): - /* - * If this is setting gs as for normal 64-bit use but - * setting gs_base has implicitly changed it, leave it. - */ - if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && - task->thread.gs != 0) || - (value == 0 && task->thread.gsindex == GS_TLS_SEL && - task->thread.gs == 0)) - break; task->thread.gsindex = value; if (task == current) load_gs_index(task->thread.gsindex); @@ -417,7 +399,7 @@ static int putreg(struct task_struct *child, * to set either thread.fs or thread.fsindex and the * corresponding GDT slot. */ - if (child->thread.fs != value) + if (child->thread.fsbase != value) return do_arch_prctl(child, ARCH_SET_FS, value); return 0; case offsetof(struct user_regs_struct,gs_base): @@ -426,7 +408,7 @@ static int putreg(struct task_struct *child, */ if (value >= TASK_SIZE_OF(child)) return -EIO; - if (child->thread.gs != value) + if (child->thread.gsbase != value) return do_arch_prctl(child, ARCH_SET_GS, value); return 0; #endif @@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) #ifdef CONFIG_X86_64 case offsetof(struct user_regs_struct, fs_base): { /* - * do_arch_prctl may have used a GDT slot instead of - * the MSR. To userland, it appears the same either - * way, except the %fs segment selector might not be 0. + * XXX: This will not behave as expected if called on + * current or if fsindex != 0. */ - unsigned int seg = task->thread.fsindex; - if (task->thread.fs != 0) - return task->thread.fs; - if (task == current) - asm("movl %%fs,%0" : "=r" (seg)); - if (seg != FS_TLS_SEL) - return 0; - return get_desc_base(&task->thread.tls_array[FS_TLS]); + return task->thread.fsbase; } case offsetof(struct user_regs_struct, gs_base): { /* - * Exactly the same here as the %fs handling above. + * XXX: This will not behave as expected if called on + * current or if fsindex != 0. */ - unsigned int seg = task->thread.gsindex; - if (task->thread.gs != 0) - return task->thread.gs; - if (task == current) - asm("movl %%gs,%0" : "=r" (seg)); - if (seg != GS_TLS_SEL) - return 0; - return get_desc_base(&task->thread.tls_array[GS_TLS]); + return task->thread.gsbase; } #endif } @@ -1266,7 +1234,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { #ifdef CONFIG_X86_X32_ABI - if (!is_ia32_task()) + if (!in_ia32_syscall()) return x32_arch_ptrace(child, request, caddr, cdata); #endif #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index ab0adc0fa5db..a9b31eb815f2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -535,6 +535,15 @@ static void native_machine_emergency_restart(void) mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; *((unsigned short *)__va(0x472)) = mode; + /* + * If an EFI capsule has been registered with the firmware then + * override the reboot= parameter. + */ + if (efi_capsule_pending(NULL)) { + pr_info("EFI capsule is pending, forcing EFI reboot.\n"); + reboot_type = BOOT_EFI; + } + for (;;) { /* Could also try the reset bit in the Hammer NB */ switch (reboot_type) { diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 4af8d063fb36..eceaa082ec3f 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -14,6 +14,7 @@ #include <asm/time.h> #include <asm/intel-mid.h> #include <asm/rtc.h> +#include <asm/setup.h> #ifdef CONFIG_X86_32 /* @@ -185,22 +186,7 @@ static __init int add_rtc_cmos(void) } } #endif - if (of_have_populated_dt()) - return 0; - - /* Intel MID platforms don't have ioport rtc */ - if (intel_mid_identify_cpu()) - return -ENODEV; - -#ifdef CONFIG_ACPI - if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { - /* This warning can likely go away again in a year or two. */ - pr_info("ACPI: not registering RTC platform device\n"); - return -ENODEV; - } -#endif - - if (paravirt_enabled() && !paravirt_has(RTC)) + if (!x86_platform.legacy.rtc) return -ENODEV; platform_device_register(&rtc_device); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 548ddf7d6fd2..22cc2f9f8aec 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -248,18 +248,17 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (config_enabled(CONFIG_X86_64)) sp -= 128; - if (!onsigstack) { - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (current->sas_ss_size) - sp = current->sas_ss_sp + current->sas_ss_size; - } else if (config_enabled(CONFIG_X86_32) && - (regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - /* This is the legacy signal stack switching. */ - sp = (unsigned long) ka->sa.sa_restorer; - } + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } else if (config_enabled(CONFIG_X86_32) && + !onsigstack && + (regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + /* This is the legacy signal stack switching. */ + sp = (unsigned long) ka->sa.sa_restorer; } if (fpu->fpstate_active) { @@ -391,7 +390,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(&frame->uc, &frame->puc); /* Create the ucontext. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); @@ -442,7 +441,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs) { unsigned long flags; - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; else flags = UC_SIGCONTEXT_SS; @@ -762,7 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { #ifdef CONFIG_X86_64 - if (is_ia32_task()) + if (in_ia32_syscall()) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a2065d3b3b39..fafe8b923cac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -332,6 +332,11 @@ static void __init smp_init_package_map(void) * primary cores. */ ncpus = boot_cpu_data.x86_max_cores; + if (!ncpus) { + pr_warn("x86_max_cores == zero !?!?"); + ncpus = 1; + } + __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); /* @@ -1231,7 +1236,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * If we couldn't find a local APIC, then get out of here now! */ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && - !cpu_has_apic) { + !boot_cpu_has(X86_FEATURE_APIC)) { if (!disable_apic) { pr_err("BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c index 5da924bbf0a0..623965e86b65 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/arch/x86/kernel/sysfb_efi.c @@ -68,6 +68,21 @@ struct efifb_dmi_info efifb_dmi_list[] = { [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE } }; +void efifb_setup_from_dmi(struct screen_info *si, const char *opt) +{ + int i; + + for (i = 0; i < M_UNKNOWN; i++) { + if (efifb_dmi_list[i].base != 0 && + !strcmp(opt, efifb_dmi_list[i].optname)) { + si->lfb_base = efifb_dmi_list[i].base; + si->lfb_linelength = efifb_dmi_list[i].stride; + si->lfb_width = efifb_dmi_list[i].width; + si->lfb_height = efifb_dmi_list[i].height; + } + } +} + #define choose_value(dmivalue, fwvalue, field, flags) ({ \ typeof(fwvalue) _ret_ = fwvalue; \ if ((flags) & (field)) \ diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index e72a07f20b05..9b0185fbe3eb 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -74,12 +74,6 @@ void __init tboot_probe(void) return; } - /* only a natively booted kernel should be using TXT */ - if (paravirt_enabled()) { - pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); - return; - } - /* Map and check for tboot UUID. */ set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index ab40954e113e..f386bad0984e 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -40,7 +40,7 @@ static inline void flush_tce(void* tceaddr) { /* a single tce can't cross a cache line */ - if (cpu_has_clflush) + if (boot_cpu_has(X86_FEATURE_CLFLUSH)) clflush(tceaddr); else wbinvd(); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 7fc5e843f247..9692a5e9fdab 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -114,6 +114,7 @@ int do_set_thread_area(struct task_struct *p, int idx, int can_allocate) { struct user_desc info; + unsigned short __maybe_unused sel, modified_sel; if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; @@ -141,6 +142,47 @@ int do_set_thread_area(struct task_struct *p, int idx, set_tls_desc(p, idx, &info, 1); + /* + * If DS, ES, FS, or GS points to the modified segment, forcibly + * refresh it. Only needed on x86_64 because x86_32 reloads them + * on return to user mode. + */ + modified_sel = (idx << 3) | 3; + + if (p == current) { +#ifdef CONFIG_X86_64 + savesegment(ds, sel); + if (sel == modified_sel) + loadsegment(ds, sel); + + savesegment(es, sel); + if (sel == modified_sel) + loadsegment(es, sel); + + savesegment(fs, sel); + if (sel == modified_sel) + loadsegment(fs, sel); + + savesegment(gs, sel); + if (sel == modified_sel) + load_gs_index(sel); +#endif + +#ifdef CONFIG_X86_32_LAZY_GS + savesegment(gs, sel); + if (sel == modified_sel) + loadsegment(gs, sel); +#endif + } else { +#ifdef CONFIG_X86_64 + if (p->thread.fsindex == modified_sel) + p->thread.fsbase = info.base_addr; + + if (p->thread.gsindex == modified_sel) + p->thread.gsbase = info.base_addr; +#endif + } + return 0; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 06cbe25861f1..d1590486204a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -51,6 +51,7 @@ #include <asm/processor.h> #include <asm/debugreg.h> #include <linux/atomic.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/traps.h> #include <asm/desc.h> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c9c4c7ce3eb2..38ba6de56ede 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -36,7 +36,7 @@ static int __read_mostly tsc_unstable; /* native_sched_clock() is called before tsc_init(), so we must start with the TSC soft disabled to prevent - erroneous rdtsc usage on !cpu_has_tsc processors */ + erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */ static int __read_mostly tsc_disabled = -1; static DEFINE_STATIC_KEY_FALSE(__use_tsc); @@ -834,15 +834,15 @@ int recalibrate_cpu_khz(void) #ifndef CONFIG_SMP unsigned long cpu_khz_old = cpu_khz; - if (cpu_has_tsc) { - tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; - cpu_data(0).loops_per_jiffy = - cpufreq_scale(cpu_data(0).loops_per_jiffy, - cpu_khz_old, cpu_khz); - return 0; - } else + if (!boot_cpu_has(X86_FEATURE_TSC)) return -ENODEV; + + tsc_khz = x86_platform.calibrate_tsc(); + cpu_khz = tsc_khz; + cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, + cpu_khz_old, cpu_khz); + + return 0; #else return -ENODEV; #endif @@ -922,9 +922,6 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; unsigned long *lpj; - if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - lpj = &boot_cpu_data.loops_per_jiffy; #ifdef CONFIG_SMP if (!(freq->flags & CPUFREQ_CONST_LOOPS)) @@ -954,9 +951,9 @@ static struct notifier_block time_cpufreq_notifier_block = { .notifier_call = time_cpufreq_notifier }; -static int __init cpufreq_tsc(void) +static int __init cpufreq_register_tsc_scaling(void) { - if (!cpu_has_tsc) + if (!boot_cpu_has(X86_FEATURE_TSC)) return 0; if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return 0; @@ -965,7 +962,7 @@ static int __init cpufreq_tsc(void) return 0; } -core_initcall(cpufreq_tsc); +core_initcall(cpufreq_register_tsc_scaling); #endif /* CONFIG_CPU_FREQ */ @@ -1081,7 +1078,7 @@ static void __init check_system_tsc_reliable(void) */ int unsynchronized_tsc(void) { - if (!cpu_has_tsc || tsc_unstable) + if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable) return 1; #ifdef CONFIG_SMP @@ -1205,7 +1202,7 @@ out: static int __init init_tsc_clocksource(void) { - if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) + if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) return 0; if (tsc_clocksource_reliable) @@ -1242,7 +1239,7 @@ void __init tsc_init(void) u64 lpj; int cpu; - if (!cpu_has_tsc) { + if (!boot_cpu_has(X86_FEATURE_TSC)) { setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index bf4db6eaec8f..6c1ff31d99ff 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -516,7 +516,7 @@ struct uprobe_xol_ops { static inline int sizeof_long(void) { - return is_ia32_task() ? 4 : 8; + return in_ia32_syscall() ? 4 : 8; } static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) @@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) riprel_post_xol(auprobe, regs); } -static struct uprobe_xol_ops default_xol_ops = { +static const struct uprobe_xol_ops default_xol_ops = { .pre_xol = default_pre_xol_op, .post_xol = default_post_xol_op, .abort = default_abort_op, @@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn) 0, insn->immediate.nbytes); } -static struct uprobe_xol_ops branch_xol_ops = { +static const struct uprobe_xol_ops branch_xol_ops = { .emulate = branch_emulate_op, .post_xol = branch_post_xol_op, }; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4c941f88d405..9297a002d8e5 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -334,7 +334,7 @@ SECTIONS __brk_limit = .; } - . = ALIGN(PAGE_SIZE); + . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ _end = .; STABS_DEBUG diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bbbaa802d13e..769af907f824 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -75,7 +75,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) return 0; /* Update OSXSAVE bit */ - if (cpu_has_xsave && best->function == 0x1) { + if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) { best->ecx &= ~F(OSXSAVE); if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) best->ecx |= F(OSXSAVE); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0f6294376fbd..a2f24af3c999 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5110,13 +5110,17 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) { + register void *__sp asm(_ASM_SP); ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; + if (!(ctxt->d & ByteOp)) fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; + asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop) + [fastop]"+S"(fop), "+r"(__sp) : "c"(ctxt->src2.val)); + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); if (!fop) /* exception is returned in fop variable */ return emulate_de(ctxt); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b6f50e8b0a39..38c0c32926c9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3844,7 +3844,8 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true, true); + boot_cpu_has(X86_FEATURE_GBPAGES), + true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 31346a3f20a5..fafd720ce10a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1254,7 +1254,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) kvm_load_ldt(svm->host.ldt); #ifdef CONFIG_X86_64 loadsegment(fs, svm->host.fs); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); load_gs_index(svm->host.gs); #else #ifdef CONFIG_X86_32_LAZY_GS diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 2f1ea2f61e1f..b72743c5668d 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -809,8 +809,7 @@ TRACE_EVENT(kvm_write_tsc_offset, #define host_clocks \ {VCLOCK_NONE, "none"}, \ - {VCLOCK_TSC, "tsc"}, \ - {VCLOCK_HPET, "hpet"} \ + {VCLOCK_TSC, "tsc"} \ TRACE_EVENT(kvm_update_master_clock, TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched), diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 133679d520af..cb47fe3da292 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3390,7 +3390,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) } } - if (cpu_has_xsaves) + if (boot_cpu_has(X86_FEATURE_XSAVES)) rdmsrl(MSR_IA32_XSS, host_xss); return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b7798c7b210..12f33e662382 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2611,7 +2611,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_MAX_MCE_BANKS; break; case KVM_CAP_XCRS: - r = cpu_has_xsave; + r = boot_cpu_has(X86_FEATURE_XSAVE); break; case KVM_CAP_TSC_CONTROL: r = kvm_has_tsc_control; @@ -3094,7 +3094,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) /* Set XSTATE_BV and possibly XCOMP_BV. */ xsave->header.xfeatures = xstate_bv; - if (cpu_has_xsaves) + if (boot_cpu_has(X86_FEATURE_XSAVES)) xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; /* @@ -3121,7 +3121,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - if (cpu_has_xsave) { + if (boot_cpu_has(X86_FEATURE_XSAVE)) { memset(guest_xsave, 0, sizeof(struct kvm_xsave)); fill_xsave((u8 *) guest_xsave->region, vcpu); } else { @@ -3139,7 +3139,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, u64 xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; - if (cpu_has_xsave) { + if (boot_cpu_has(X86_FEATURE_XSAVE)) { /* * Here we allow setting states that are not present in * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility @@ -3160,7 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, struct kvm_xcrs *guest_xcrs) { - if (!cpu_has_xsave) { + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { guest_xcrs->nr_xcrs = 0; return; } @@ -3176,7 +3176,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, { int i, r = 0; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -EINVAL; if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) @@ -5865,7 +5865,7 @@ int kvm_arch_init(void *opaque) perf_register_guest_info_callbacks(&kvm_guest_cbs); - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_lapic_init(); @@ -7293,7 +7293,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) static void fx_init(struct kvm_vcpu *vcpu) { fpstate_init(&vcpu->arch.guest_fpu.state); - if (cpu_has_xsaves) + if (boot_cpu_has(X86_FEATURE_XSAVES)) vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index fd57d3ae7e16..3847e736702e 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1408,13 +1408,10 @@ __init void lguest_init(void) { /* We're under lguest. */ pv_info.name = "lguest"; - /* Paravirt is enabled. */ - pv_info.paravirt_enabled = 1; /* We're running at privilege level 1, not 0 as normal. */ pv_info.kernel_rpl = 1; /* Everyone except Xen runs with this set. */ pv_info.shared_kernel_pmd = 1; - pv_info.features = 0; /* * We set up all the lguest overrides for sensitive operations. These diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index be110efa0096..bf2c6074efd2 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -29,8 +29,10 @@ * there is contention on the semaphore. * * %eax contains the semaphore pointer on entry. Save the C-clobbered - * registers (%eax, %edx and %ecx) except %eax whish is either a return - * value or just clobbered.. + * registers (%eax, %edx and %ecx) except %eax which is either a return + * value or just gets clobbered. Same is true for %edx so make sure GCC + * reloads it after the slow path, by making it hold a temporary, for + * example see ____down_write(). */ #define save_common_regs \ @@ -106,6 +108,16 @@ ENTRY(call_rwsem_down_write_failed) ret ENDPROC(call_rwsem_down_write_failed) +ENTRY(call_rwsem_down_write_failed_killable) + FRAME_BEGIN + save_common_regs + movq %rax,%rdi + call rwsem_down_write_failed_killable + restore_common_regs + FRAME_END + ret +ENDPROC(call_rwsem_down_write_failed_killable) + ENTRY(call_rwsem_wake) FRAME_BEGIN /* do nothing if still outstanding active readers */ diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 91d93b95bd86..b559d9238781 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -612,7 +612,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, { stac(); #ifdef CONFIG_X86_INTEL_USERCOPY - if (n > 64 && cpu_has_xmm2) + if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_zeroing_intel_nocache(to, from, n); else __copy_user_zeroing(to, from, n); @@ -629,7 +629,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr { stac(); #ifdef CONFIG_X86_INTEL_USERCOPY - if (n > 64 && cpu_has_xmm2) + if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); else __copy_user(to, from, n); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index f98913258c63..62c0043a5fd5 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -2,7 +2,7 @@ KCOV_INSTRUMENT_tlb.o := n obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o gup.o setup_nx.o + pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) @@ -12,7 +12,6 @@ CFLAGS_setup_nx.o := $(nostackp) CFLAGS_fault.o := -I$(src)/../include/asm/trace obj-$(CONFIG_X86_PAT) += pat_rbtree.o -obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 82447b3fba38..4bb53b89f3c5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,5 +1,6 @@ #include <linux/module.h> #include <asm/uaccess.h> +#include <asm/traps.h> typedef bool (*ex_handler_t)(const struct exception_table_entry *, struct pt_regs *, int); @@ -42,6 +43,43 @@ bool ex_handler_ext(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_ext); +bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n", + (unsigned int)regs->cx); + + /* Pretend that the read succeeded and returned 0. */ + regs->ip = ex_fixup_addr(fixup); + regs->ax = 0; + regs->dx = 0; + return true; +} +EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); + +bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n", + (unsigned int)regs->cx, + (unsigned int)regs->dx, (unsigned int)regs->ax); + + /* Pretend that the write succeeded. */ + regs->ip = ex_fixup_addr(fixup); + return true; +} +EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); + +bool ex_handler_clear_fs(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + if (static_cpu_has(X86_BUG_NULL_SEG)) + asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); + asm volatile ("mov %0, %%fs" : : "rm" (0)); + return ex_handler_default(fixup, regs, trapnr); +} +EXPORT_SYMBOL(ex_handler_clear_fs); + bool ex_has_fault_handler(unsigned long ip) { const struct exception_table_entry *e; @@ -82,24 +120,46 @@ int fixup_exception(struct pt_regs *regs, int trapnr) return handler(e, regs, trapnr); } +extern unsigned int early_recursion_flag; + /* Restricted version used during very early boot */ -int __init early_fixup_exception(unsigned long *ip) +void __init early_fixup_exception(struct pt_regs *regs, int trapnr) { - const struct exception_table_entry *e; - unsigned long new_ip; - ex_handler_t handler; - - e = search_exception_tables(*ip); - if (!e) - return 0; - - new_ip = ex_fixup_addr(e); - handler = ex_fixup_handler(e); - - /* special handling not supported during early boot */ - if (handler != ex_handler_default) - return 0; - - *ip = new_ip; - return 1; + /* Ignore early NMIs. */ + if (trapnr == X86_TRAP_NMI) + return; + + if (early_recursion_flag > 2) + goto halt_loop; + + if (regs->cs != __KERNEL_CS) + goto fail; + + /* + * The full exception fixup machinery is available as soon as + * the early IDT is loaded. This means that it is the + * responsibility of extable users to either function correctly + * when handlers are invoked early or to simply avoid causing + * exceptions before they're ready to handle them. + * + * This is better than filtering which handlers can be used, + * because refusing to call a handler here is guaranteed to + * result in a hard-to-debug panic. + * + * Keep in mind that not all vectors actually get here. Early + * fage faults, for example, are special. + */ + if (fixup_exception(regs, trapnr)) + return; + +fail: + early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", + (unsigned)trapnr, (unsigned long)regs->cs, regs->ip, + regs->orig_ax, read_cr2()); + + show_regs(regs); + +halt_loop: + while (true) + halt(); } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 740d7ac03a55..14a95054d4e0 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -162,7 +162,7 @@ static __init int setup_hugepagesz(char *opt) unsigned long ps = memparse(opt, &opt); if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (ps == PUD_SIZE && cpu_has_gbpages) { + } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); } else { printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", @@ -177,7 +177,7 @@ __setup("hugepagesz=", setup_hugepagesz); static __init int gigantic_pages_init(void) { /* With compaction or CMA we can allocate gigantic pages at runtime */ - if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT)) + if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT)) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); return 0; } diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c new file mode 100644 index 000000000000..ec21796ac5fd --- /dev/null +++ b/arch/x86/mm/ident_map.c @@ -0,0 +1,79 @@ +/* + * Helper routines for building identity mapping page tables. This is + * included by both the compressed kernel and the regular kernel. + */ + +static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, + unsigned long addr, unsigned long end) +{ + addr &= PMD_MASK; + for (; addr < end; addr += PMD_SIZE) { + pmd_t *pmd = pmd_page + pmd_index(addr); + + if (!pmd_present(*pmd)) + set_pmd(pmd, __pmd(addr | pmd_flag)); + } +} + +static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + + for (; addr < end; addr = next) { + pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; + + next = (addr & PUD_MASK) + PUD_SIZE; + if (next > end) + next = end; + + if (pud_present(*pud)) { + pmd = pmd_offset(pud, 0); + ident_pmd_init(info->pmd_flag, pmd, addr, next); + continue; + } + pmd = (pmd_t *)info->alloc_pgt_page(info->context); + if (!pmd) + return -ENOMEM; + ident_pmd_init(info->pmd_flag, pmd, addr, next); + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + + return 0; +} + +int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + int result; + int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; + + for (; addr < end; addr = next) { + pgd_t *pgd = pgd_page + pgd_index(addr) + off; + pud_t *pud; + + next = (addr & PGDIR_MASK) + PGDIR_SIZE; + if (next > end) + next = end; + + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, 0); + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + continue; + } + + pud = (pud_t *)info->alloc_pgt_page(info->context); + if (!pud) + return -ENOMEM; + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); + } + + return 0; +} diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 9d56f271d519..372aad2b3291 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -157,23 +157,23 @@ static void __init probe_page_size_mask(void) * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (cpu_has_pse && !debug_pagealloc_enabled()) + if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) page_size_mask |= 1 << PG_LEVEL_2M; #endif /* Enable PSE if available */ - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } else __supported_pte_mask &= ~_PAGE_GLOBAL; /* Enable 1 GB linear kernel mappings if available: */ - if (direct_gbpages && cpu_has_gbpages) { + if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { printk(KERN_INFO "Using GB pages for direct mapping\n"); page_size_mask |= 1 << PG_LEVEL_1G; } else { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bd7a9b9e2e14..84df150ee77e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -284,7 +284,7 @@ kernel_physical_mapping_init(unsigned long start, */ mapping_iter = 1; - if (!cpu_has_pse) + if (!boot_cpu_has(X86_FEATURE_PSE)) use_pse = 0; repeat: @@ -804,9 +804,6 @@ void __init mem_init(void) BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); #undef high_memory #undef __FIXADDR_TOP -#ifdef CONFIG_RANDOMIZE_BASE - BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); -#endif #ifdef CONFIG_HIGHMEM BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 214afda97911..bce2e5d9edd4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -58,79 +58,7 @@ #include "mm_internal.h" -static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, - unsigned long addr, unsigned long end) -{ - addr &= PMD_MASK; - for (; addr < end; addr += PMD_SIZE) { - pmd_t *pmd = pmd_page + pmd_index(addr); - - if (!pmd_present(*pmd)) - set_pmd(pmd, __pmd(addr | pmd_flag)); - } -} -static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, - unsigned long addr, unsigned long end) -{ - unsigned long next; - - for (; addr < end; addr = next) { - pud_t *pud = pud_page + pud_index(addr); - pmd_t *pmd; - - next = (addr & PUD_MASK) + PUD_SIZE; - if (next > end) - next = end; - - if (pud_present(*pud)) { - pmd = pmd_offset(pud, 0); - ident_pmd_init(info->pmd_flag, pmd, addr, next); - continue; - } - pmd = (pmd_t *)info->alloc_pgt_page(info->context); - if (!pmd) - return -ENOMEM; - ident_pmd_init(info->pmd_flag, pmd, addr, next); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - } - - return 0; -} - -int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, - unsigned long addr, unsigned long end) -{ - unsigned long next; - int result; - int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; - - for (; addr < end; addr = next) { - pgd_t *pgd = pgd_page + pgd_index(addr) + off; - pud_t *pud; - - next = (addr & PGDIR_MASK) + PGDIR_SIZE; - if (next > end) - next = end; - - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, 0); - result = ident_pud_init(info, pud, addr, next); - if (result) - return result; - continue; - } - - pud = (pud_t *)info->alloc_pgt_page(info->context); - if (!pud) - return -ENOMEM; - result = ident_pud_init(info, pud, addr, next); - if (result) - return result; - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); - } - - return 0; -} +#include "ident_map.c" /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the @@ -1295,7 +1223,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) struct vmem_altmap *altmap = to_vmem_altmap(start); int err; - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) err = vmemmap_populate_hugepages(start, end, node, altmap); else if (altmap) { pr_err_once("%s: no cpu support for altmap allocations\n", @@ -1338,7 +1266,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, } get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); - if (!cpu_has_pse) { + if (!boot_cpu_has(X86_FEATURE_PSE)) { next = (addr + PAGE_SIZE) & PAGE_MASK; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0d8d53d1f5cc..f0894910bdd7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -378,7 +378,7 @@ EXPORT_SYMBOL(iounmap); int __init arch_ioremap_pud_supported(void) { #ifdef CONFIG_X86_64 - return cpu_has_gbpages; + return boot_cpu_has(X86_FEATURE_GBPAGES); #else return 0; #endif @@ -386,7 +386,7 @@ int __init arch_ioremap_pud_supported(void) int __init arch_ioremap_pmd_supported(void) { - return cpu_has_pse; + return boot_cpu_has(X86_FEATURE_PSE); } /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 01be9ec3bf79..7a1f7bbf4105 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1055,7 +1055,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* * Map everything starting from the Gb boundary, possibly with 1G pages */ - while (cpu_has_gbpages && end - start >= PUD_SIZE) { + while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) { set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); @@ -1125,8 +1125,14 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, int primary) { - if (cpa->pgd) + if (cpa->pgd) { + /* + * Right now, we only execute this code path when mapping + * the EFI virtual memory map regions, no other users + * provide a ->pgd value. This may change in the future. + */ return populate_pgd(cpa, vaddr); + } /* * Ignore all non primary paths. @@ -1460,7 +1466,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, * error case we fall back to cpa_flush_all (which uses * WBINVD): */ - if (!ret && cpu_has_clflush) { + if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { cpa_flush_array(addr, numpages, cache, cpa.flags, pages); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index faec01e7a17d..fb0604f11eec 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -40,11 +40,22 @@ static bool boot_cpu_done; static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); +static void init_cache_modes(void); -static inline void pat_disable(const char *reason) +void pat_disable(const char *reason) { + if (!__pat_enabled) + return; + + if (boot_cpu_done) { + WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); + return; + } + __pat_enabled = 0; pr_info("x86/PAT: %s\n", reason); + + init_cache_modes(); } static int __init nopat(char *str) @@ -181,7 +192,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) * configuration. * Using lower indices is preferred, so we start with highest index. */ -void pat_init_cache_modes(u64 pat) +static void __init_cache_modes(u64 pat) { enum page_cache_mode cache; char pat_msg[33]; @@ -202,14 +213,11 @@ static void pat_bsp_init(u64 pat) { u64 tmp_pat; - if (!cpu_has_pat) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { pat_disable("PAT not supported by CPU."); return; } - if (!pat_enabled()) - goto done; - rdmsrl(MSR_IA32_CR_PAT, tmp_pat); if (!tmp_pat) { pat_disable("PAT MSR is 0, disabled."); @@ -218,16 +226,12 @@ static void pat_bsp_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); -done: - pat_init_cache_modes(pat); + __init_cache_modes(pat); } static void pat_ap_init(u64 pat) { - if (!pat_enabled()) - return; - - if (!cpu_has_pat) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { /* * If this happens we are on a secondary CPU, but switched to * PAT on the boot CPU. We have no way to undo PAT. @@ -238,18 +242,32 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -void pat_init(void) +static void init_cache_modes(void) { - u64 pat; - struct cpuinfo_x86 *c = &boot_cpu_data; + u64 pat = 0; + static int init_cm_done; - if (!pat_enabled()) { + if (init_cm_done) + return; + + if (boot_cpu_has(X86_FEATURE_PAT)) { + /* + * CPU supports PAT. Set PAT table to be consistent with + * PAT MSR. This case supports "nopat" boot option, and + * virtual machine environments which support PAT without + * MTRRs. In specific, Xen has unique setup to PAT MSR. + * + * If PAT MSR returns 0, it is considered invalid and emulates + * as No PAT. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + } + + if (!pat) { /* * No PAT. Emulate the PAT table that corresponds to the two - * cache bits, PWT (Write Through) and PCD (Cache Disable). This - * setup is the same as the BIOS default setup when the system - * has PAT but the "nopat" boot option has been specified. This - * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. + * cache bits, PWT (Write Through) and PCD (Cache Disable). + * This setup is also the same as the BIOS default setup. * * PTE encoding: * @@ -266,10 +284,36 @@ void pat_init(void) */ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } + + __init_cache_modes(pat); + + init_cm_done = 1; +} + +/** + * pat_init - Initialize PAT MSR and PAT table + * + * This function initializes PAT MSR and PAT table with an OS-defined value + * to enable additional cache attributes, WC and WT. + * + * This function must be called on all CPUs using the specific sequence of + * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this + * procedure for PAT. + */ +void pat_init(void) +{ + u64 pat; + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (!pat_enabled()) { + init_cache_modes(); + return; + } - } else if ((c->x86_vendor == X86_VENDOR_INTEL) && - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { + if ((c->x86_vendor == X86_VENDOR_INTEL) && + (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || + ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { /* * PAT support with the lower four entries. Intel Pentium 2, * 3, M, and 4 are affected by PAT errata, which makes the @@ -734,25 +778,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (file->f_flags & O_DSYNC) pcm = _PAGE_CACHE_MODE_UC_MINUS; -#ifdef CONFIG_X86_32 - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting UC or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - if (!pat_enabled() && - !(boot_cpu_has(X86_FEATURE_MTRR) || - boot_cpu_has(X86_FEATURE_K6_MTRR) || - boot_cpu_has(X86_FEATURE_CYRIX_ARR) || - boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { - pcm = _PAGE_CACHE_MODE_UC; - } -#endif - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | cachemode2protval(pcm)); return 1; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index fe9b9f776361..5643fd0b1a7d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -28,6 +28,8 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +#ifdef CONFIG_SMP + struct flush_tlb_info { struct mm_struct *flush_mm; unsigned long flush_start; @@ -57,6 +59,118 @@ void leave_mm(int cpu) } EXPORT_SYMBOL_GPL(leave_mm); +#endif /* CONFIG_SMP */ + +void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} + +void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + + if (likely(prev != next)) { +#ifdef CONFIG_SMP + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); + this_cpu_write(cpu_tlbstate.active_mm, next); +#endif + cpumask_set_cpu(cpu, mm_cpumask(next)); + + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. + * + */ + load_cr3(next->pgd); + + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + + /* Stop flush ipis for the previous mm */ + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + + /* Load per-mm CR4 state */ + load_mm_cr4(next); + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + /* + * Load the LDT, if the LDT is different. + * + * It's possible that prev->context.ldt doesn't match + * the LDT register. This can happen if leave_mm(prev) + * was called and then modify_ldt changed + * prev->context.ldt but suppressed an IPI to this CPU. + * In this case, prev->context.ldt != NULL, because we + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_mm_ldt(next); +#endif + } +#ifdef CONFIG_SMP + else { + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); + + if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { + /* + * On established mms, the mm_cpumask is only changed + * from irq context, from ptep_clear_flush() while in + * lazy tlb mode, and here. Irqs are blocked during + * schedule, protecting us from simultaneous changes. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); + + /* + * We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. + * + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. + */ + load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + load_mm_cr4(next); + load_mm_ldt(next); + } + } +#endif +} + +#ifdef CONFIG_SMP + /* * The flush IPI assumes that a thread switch happens in this order: * [cpu0: the cpu that switches] @@ -353,3 +467,5 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); + +#endif /* CONFIG_SMP */ diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 0e07e0968c3a..28c04123b6dd 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -636,7 +636,7 @@ static int __init ppro_init(char **cpu_type) __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ - if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) + if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) return 0; /* @@ -700,7 +700,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) char *cpu_type = NULL; int ret = 0; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; if (force_cpu_type == timer) @@ -761,7 +761,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (cpu_type) break; - if (!cpu_has_arch_perfmon) + if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) return -ENODEV; /* use arch perfmon as fallback */ diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index d90528ea5412..350f7096baac 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -75,7 +75,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, u64 val; int i; - if (cpu_has_arch_perfmon) { + if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) { union cpuid10_eax eax; eax.full = cpuid_eax(0xa); diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index beac4dfdade6..4bd08b0fc8ea 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -445,7 +445,7 @@ void __init xen_msi_init(void) uint32_t eax = cpuid_eax(xen_cpuid_base() + 4); if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) || - ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic)) + ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC))) return; } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 994a7df84a7b..f93545e7dc54 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -54,10 +54,6 @@ #include <asm/rtc.h> #include <asm/uv/uv.h> -#define EFI_DEBUG - -struct efi_memory_map memmap; - static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; @@ -119,11 +115,10 @@ void efi_get_time(struct timespec *now) void __init efi_find_mirror(void) { - void *p; + efi_memory_desc_t *md; u64 mirror_size = 0, total_size = 0; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; @@ -146,10 +141,9 @@ void __init efi_find_mirror(void) static void __init do_add_efi_memmap(void) { - void *p; + efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; int e820_type; @@ -209,47 +203,47 @@ int __init efi_memblock_x86_reserve_range(void) #else pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); #endif - memmap.phys_map = pmap; - memmap.nr_map = e->efi_memmap_size / + efi.memmap.phys_map = pmap; + efi.memmap.nr_map = e->efi_memmap_size / e->efi_memdesc_size; - memmap.desc_size = e->efi_memdesc_size; - memmap.desc_version = e->efi_memdesc_version; + efi.memmap.desc_size = e->efi_memdesc_size; + efi.memmap.desc_version = e->efi_memdesc_version; - memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); + WARN(efi.memmap.desc_version != 1, + "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", + efi.memmap.desc_version); - efi.memmap = &memmap; + memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size); return 0; } void __init efi_print_memmap(void) { -#ifdef EFI_DEBUG efi_memory_desc_t *md; - void *p; - int i; + int i = 0; - for (p = memmap.map, i = 0; - p < memmap.map_end; - p += memmap.desc_size, i++) { + for_each_efi_memory_desc(md) { char buf[64]; - md = p; pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", - i, efi_md_typeattr_format(buf, sizeof(buf), md), + i++, efi_md_typeattr_format(buf, sizeof(buf), md), md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } -#endif /* EFI_DEBUG */ } void __init efi_unmap_memmap(void) { + unsigned long size; + clear_bit(EFI_MEMMAP, &efi.flags); - if (memmap.map) { - early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size); - memmap.map = NULL; + + size = efi.memmap.nr_map * efi.memmap.desc_size; + if (efi.memmap.map) { + early_memunmap(efi.memmap.map, size); + efi.memmap.map = NULL; } } @@ -352,8 +346,6 @@ static int __init efi_systab_init(void *phys) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff); - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - return 0; } @@ -440,17 +432,22 @@ static int __init efi_runtime_init(void) static int __init efi_memmap_init(void) { + unsigned long addr, size; + if (efi_enabled(EFI_PARAVIRT)) return 0; /* Map the EFI memory map */ - memmap.map = early_memremap((unsigned long)memmap.phys_map, - memmap.nr_map * memmap.desc_size); - if (memmap.map == NULL) { + size = efi.memmap.nr_map * efi.memmap.desc_size; + addr = (unsigned long)efi.memmap.phys_map; + + efi.memmap.map = early_memremap(addr, size); + if (efi.memmap.map == NULL) { pr_err("Could not map the memory map!\n"); return -ENOMEM; } - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + + efi.memmap.map_end = efi.memmap.map + size; if (add_efi_memmap) do_add_efi_memmap(); @@ -552,12 +549,9 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable) void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; - void *p; /* Make EFI runtime service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - + for_each_efi_memory_desc(md) { if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; @@ -604,12 +598,10 @@ void __init old_map_region(efi_memory_desc_t *md) /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { - void *p; efi_memory_desc_t *md, *prev_md = NULL; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + for_each_efi_memory_desc(md) { u64 prev_size; - md = p; if (!prev_md) { prev_md = md; @@ -651,30 +643,31 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) static void __init save_runtime_map(void) { #ifdef CONFIG_KEXEC_CORE + unsigned long desc_size; efi_memory_desc_t *md; - void *tmp, *p, *q = NULL; + void *tmp, *q = NULL; int count = 0; if (efi_enabled(EFI_OLD_MEMMAP)) return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; + desc_size = efi.memmap.desc_size; + for_each_efi_memory_desc(md) { if (!(md->attribute & EFI_MEMORY_RUNTIME) || (md->type == EFI_BOOT_SERVICES_CODE) || (md->type == EFI_BOOT_SERVICES_DATA)) continue; - tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); + tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL); if (!tmp) goto out; q = tmp; - memcpy(q + count * memmap.desc_size, md, memmap.desc_size); + memcpy(q + count * desc_size, md, desc_size); count++; } - efi_runtime_map_setup(q, count, memmap.desc_size); + efi_runtime_map_setup(q, count, desc_size); return; out: @@ -714,10 +707,10 @@ static inline void *efi_map_next_entry_reverse(void *entry) { /* Initial call */ if (!entry) - return memmap.map_end - memmap.desc_size; + return efi.memmap.map_end - efi.memmap.desc_size; - entry -= memmap.desc_size; - if (entry < memmap.map) + entry -= efi.memmap.desc_size; + if (entry < efi.memmap.map) return NULL; return entry; @@ -759,10 +752,10 @@ static void *efi_map_next_entry(void *entry) /* Initial call */ if (!entry) - return memmap.map; + return efi.memmap.map; - entry += memmap.desc_size; - if (entry >= memmap.map_end) + entry += efi.memmap.desc_size; + if (entry >= efi.memmap.map_end) return NULL; return entry; @@ -776,8 +769,11 @@ static void * __init efi_map_regions(int *count, int *pg_shift) { void *p, *new_memmap = NULL; unsigned long left = 0; + unsigned long desc_size; efi_memory_desc_t *md; + desc_size = efi.memmap.desc_size; + p = NULL; while ((p = efi_map_next_entry(p))) { md = p; @@ -792,7 +788,7 @@ static void * __init efi_map_regions(int *count, int *pg_shift) efi_map_region(md); get_systab_virt_addr(md); - if (left < memmap.desc_size) { + if (left < desc_size) { new_memmap = realloc_pages(new_memmap, *pg_shift); if (!new_memmap) return NULL; @@ -801,10 +797,9 @@ static void * __init efi_map_regions(int *count, int *pg_shift) (*pg_shift)++; } - memcpy(new_memmap + (*count * memmap.desc_size), md, - memmap.desc_size); + memcpy(new_memmap + (*count * desc_size), md, desc_size); - left -= memmap.desc_size; + left -= desc_size; (*count)++; } @@ -816,7 +811,6 @@ static void __init kexec_enter_virtual_mode(void) #ifdef CONFIG_KEXEC_CORE efi_memory_desc_t *md; unsigned int num_pages; - void *p; efi.systab = NULL; @@ -840,8 +834,7 @@ static void __init kexec_enter_virtual_mode(void) * Map efi regions which were passed via setup_data. The virt_addr is a * fixed addr which was used in first kernel of a kexec boot. */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; + for_each_efi_memory_desc(md) { efi_map_region_fixed(md); /* FIXME: add error handling */ get_systab_virt_addr(md); } @@ -850,10 +843,10 @@ static void __init kexec_enter_virtual_mode(void) BUG_ON(!efi.systab); - num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE); + num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE); num_pages >>= PAGE_SHIFT; - if (efi_setup_page_tables(memmap.phys_map, num_pages)) { + if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) { clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; } @@ -937,16 +930,16 @@ static void __init __efi_enter_virtual_mode(void) if (efi_is_native()) { status = phys_efi_set_virtual_address_map( - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, + efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, (efi_memory_desc_t *)__pa(new_memmap)); } else { status = efi_thunk_set_virtual_address_map( efi_phys.set_virtual_address_map, - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, + efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, (efi_memory_desc_t *)__pa(new_memmap)); } @@ -1011,13 +1004,11 @@ void __init efi_enter_virtual_mode(void) u32 efi_mem_type(unsigned long phys_addr) { efi_memory_desc_t *md; - void *p; if (!efi_enabled(EFI_MEMMAP)) return 0; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; + for_each_efi_memory_desc(md) { if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)))) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 49e4dd4a1f58..6e7242be1c87 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -55,14 +55,12 @@ struct efi_scratch efi_scratch; static void __init early_code_mapping_set_exec(int executable) { efi_memory_desc_t *md; - void *p; if (!(__supported_pte_mask & _PAGE_NX)) return; /* Make EFI service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; + for_each_efi_memory_desc(md) { if (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_BOOT_SERVICES_CODE) efi_set_executable(md, executable); @@ -253,7 +251,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. */ - for_each_efi_memory_desc(&memmap, md) { + for_each_efi_memory_desc(md) { if (md->type != EFI_CONVENTIONAL_MEMORY && md->type != EFI_LOADER_DATA && md->type != EFI_LOADER_CODE) @@ -398,7 +396,6 @@ void __init efi_runtime_update_mappings(void) unsigned long pfn; pgd_t *pgd = efi_pgd; efi_memory_desc_t *md; - void *p; if (efi_enabled(EFI_OLD_MEMMAP)) { if (__supported_pte_mask & _PAGE_NX) @@ -409,9 +406,8 @@ void __init efi_runtime_update_mappings(void) if (!efi_enabled(EFI_NX_PE_DATA)) return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + for_each_efi_memory_desc(md) { unsigned long pf = 0; - md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index ab50ada1d56e..097cb09d917b 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -195,10 +195,9 @@ static bool can_free_region(u64 start, u64 size) */ void __init efi_reserve_boot_services(void) { - void *p; + efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; bool already_reserved; @@ -250,10 +249,9 @@ void __init efi_reserve_boot_services(void) void __init efi_free_boot_services(void) { - void *p; + efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 1584cbed0dce..815fec6e05e2 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -21,19 +21,20 @@ #include <linux/efi.h> #include <linux/export.h> +#include <linux/slab.h> #include <asm/efi.h> #include <linux/io.h> #include <asm/uv/bios.h> #include <asm/uv/uv_hub.h> -static struct uv_systab uv_systab; +struct uv_systab *uv_systab; s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { - struct uv_systab *tab = &uv_systab; + struct uv_systab *tab = uv_systab; s64 ret; - if (!tab->function) + if (!tab || !tab->function) /* * BIOS does not support UV systab */ @@ -183,34 +184,31 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) } EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); - #ifdef CONFIG_EFI void uv_bios_init(void) { - struct uv_systab *tab; - - if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || - (efi.uv_systab == (unsigned long)NULL)) { - printk(KERN_CRIT "No EFI UV System Table.\n"); - uv_systab.function = (unsigned long)NULL; + uv_systab = NULL; + if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) { + pr_crit("UV: UVsystab: missing\n"); return; } - tab = (struct uv_systab *)ioremap(efi.uv_systab, - sizeof(struct uv_systab)); - if (strncmp(tab->signature, "UVST", 4) != 0) - printk(KERN_ERR "bad signature in UV system table!"); - - /* - * Copy table to permanent spot for later use. - */ - memcpy(&uv_systab, tab, sizeof(struct uv_systab)); - iounmap(tab); + uv_systab = ioremap(efi.uv_systab, sizeof(struct uv_systab)); + if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) { + pr_err("UV: UVsystab: bad signature!\n"); + iounmap(uv_systab); + return; + } - printk(KERN_INFO "EFI UV System Table Revision %d\n", - uv_systab.revision); + if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) { + iounmap(uv_systab); + uv_systab = ioremap(efi.uv_systab, uv_systab->size); + if (!uv_systab) { + pr_err("UV: UVsystab: ioremap(%d) failed!\n", + uv_systab->size); + return; + } + } + pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision); } -#else /* !CONFIG_EFI */ - -void uv_bios_init(void) { } #endif diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3b6ec42718e4..fdb4d42b4ce5 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -37,7 +37,7 @@ static int timeout_base_ns[] = { }; static int timeout_us; -static int nobau; +static bool nobau = true; static int nobau_perm; static cycles_t congested_cycles; @@ -106,13 +106,28 @@ static char *stat_description[] = { "enable: number times use of the BAU was re-enabled" }; -static int __init -setup_nobau(char *arg) +static int __init setup_bau(char *arg) { - nobau = 1; + int result; + + if (!arg) + return -EINVAL; + + result = strtobool(arg, &nobau); + if (result) + return result; + + /* we need to flip the logic here, so that bau=y sets nobau to false */ + nobau = !nobau; + + if (!nobau) + pr_info("UV BAU Enabled\n"); + else + pr_info("UV BAU Disabled\n"); + return 0; } -early_param("nobau", setup_nobau); +early_param("bau", setup_bau); /* base pnode in this partition */ static int uv_base_pnode __read_mostly; @@ -131,10 +146,10 @@ set_bau_on(void) pr_info("BAU not initialized; cannot be turned on\n"); return; } - nobau = 0; + nobau = false; for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); - bcp->nobau = 0; + bcp->nobau = false; } pr_info("BAU turned on\n"); return; @@ -146,10 +161,10 @@ set_bau_off(void) int cpu; struct bau_control *bcp; - nobau = 1; + nobau = true; for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); - bcp->nobau = 1; + bcp->nobau = true; } pr_info("BAU turned off\n"); return; @@ -1886,7 +1901,7 @@ static void __init init_per_cpu_tunables(void) bcp = &per_cpu(bau_control, cpu); bcp->baudisabled = 0; if (nobau) - bcp->nobau = 1; + bcp->nobau = true; bcp->statp = &per_cpu(ptcstats, cpu); /* time interval to catch a hardware stay-busy bug */ bcp->timeout_interval = usec_2_cycles(2*timeout_us); @@ -2025,7 +2040,8 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, return 1; } bcp->uvhub_master = *hmasterp; - bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; + bcp->uvhub_cpu = uv_cpu_blade_processor_id(cpu); + if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { printk(KERN_EMERG "%d cpus per uvhub invalid\n", bcp->uvhub_cpu); diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c index 5d4ba301e776..e9da9ebd924a 100644 --- a/arch/x86/platform/uv/uv_sysfs.c +++ b/arch/x86/platform/uv/uv_sysfs.c @@ -34,7 +34,7 @@ static ssize_t partition_id_show(struct kobject *kobj, static ssize_t coherence_id_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); + return snprintf(buf, PAGE_SIZE, "%ld\n", uv_partition_coherence_id()); } static struct kobj_attribute partition_id_attr = diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 2b158a9fa1d7..b333fc45f9ec 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -165,7 +165,7 @@ static __init int uv_rtc_allocate_timers(void) for_each_present_cpu(cpu) { int nid = cpu_to_node(cpu); int bid = uv_cpu_to_blade_id(cpu); - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + int bcpu = uv_cpu_blade_processor_id(cpu); struct uv_rtc_timer_head *head = blade_info[bid]; if (!head) { @@ -226,7 +226,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) int pnode = uv_cpu_to_pnode(cpu); int bid = uv_cpu_to_blade_id(cpu); struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + int bcpu = uv_cpu_blade_processor_id(cpu); u64 *t = &head->cpu[bcpu].expires; unsigned long flags; int next_cpu; @@ -262,7 +262,7 @@ static int uv_rtc_unset_timer(int cpu, int force) int pnode = uv_cpu_to_pnode(cpu); int bid = uv_cpu_to_blade_id(cpu); struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + int bcpu = uv_cpu_blade_processor_id(cpu); u64 *t = &head->cpu[bcpu].expires; unsigned long flags; int rc = 0; diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 291226b952a9..9f14bd34581d 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -106,7 +106,7 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) * normal page tables. * NOTE: We can mark everything as executable here */ - if (cpu_has_pse) { + if (boot_cpu_has(X86_FEATURE_PSE)) { set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); pfn += PTRS_PER_PTE; } else { diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig index df280da34825..d957d5f21a86 100644 --- a/arch/x86/ras/Kconfig +++ b/arch/x86/ras/Kconfig @@ -1,4 +1,4 @@ -config AMD_MCE_INJ +config MCE_AMD_INJ tristate "Simple MCE injection interface for AMD processors" depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB default n diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile index dd2c98b84037..5f94546db280 100644 --- a/arch/x86/ras/Makefile +++ b/arch/x86/ras/Makefile @@ -1,2 +1,2 @@ -obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o +obj-$(CONFIG_MCE_AMD_INJ) += mce_amd_inj.o diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 9e02dcaef683..e69f4701a076 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -290,14 +290,33 @@ static void do_inject(void) wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, (u32)mcg_status, (u32)(mcg_status >> 32)); - wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), - (u32)i_mce.status, (u32)(i_mce.status >> 32)); + if (boot_cpu_has(X86_FEATURE_SMCA)) { + if (inj_type == DFR_INT_INJ) { + wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b), + (u32)i_mce.status, (u32)(i_mce.status >> 32)); + + wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b), + (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); + } else { + wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b), + (u32)i_mce.status, (u32)(i_mce.status >> 32)); + + wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b), + (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); + } + + wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b), + (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); + } else { + wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), + (u32)i_mce.status, (u32)(i_mce.status >> 32)); - wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); + wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), + (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); - wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), - (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); + wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), + (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); + } toggle_hw_mce_inject(cpu, false); diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh deleted file mode 100644 index 1a4c17bb3910..000000000000 --- a/arch/x86/tools/calc_run_size.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/sh -# -# Calculate the amount of space needed to run the kernel, including room for -# the .bss and .brk sections. -# -# Usage: -# objdump -h a.out | sh calc_run_size.sh - -NUM='\([0-9a-fA-F]*[ \t]*\)' -OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p') -if [ -z "$OUT" ] ; then - echo "Never found .bss or .brk file offset" >&2 - exit 1 -fi - -OUT=$(echo ${OUT# }) -sizeA=$(printf "%d" 0x${OUT%% *}) -OUT=${OUT#* } -offsetA=$(printf "%d" 0x${OUT%% *}) -OUT=${OUT#* } -sizeB=$(printf "%d" 0x${OUT%% *}) -OUT=${OUT#* } -offsetB=$(printf "%d" 0x${OUT%% *}) - -run_size=$(( $offsetA + $sizeA + $sizeB )) - -# BFD linker shows the same file offset in ELF. -if [ "$offsetA" -ne "$offsetB" ] ; then - # Gold linker shows them as consecutive. - endB=$(( $offsetB + $sizeB )) - if [ "$endB" != "$run_size" ] ; then - printf "sizeA: 0x%x\n" $sizeA >&2 - printf "offsetA: 0x%x\n" $offsetA >&2 - printf "sizeB: 0x%x\n" $sizeB >&2 - printf "offsetB: 0x%x\n" $offsetB >&2 - echo ".bss and .brk are non-contiguous" >&2 - exit 1 - fi -fi - -printf "%d\n" $run_size -exit 0 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 880862c7d9dd..760789ae8562 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,7 +75,6 @@ #include <asm/mach_traps.h> #include <asm/mwait.h> #include <asm/pci_x86.h> -#include <asm/pat.h> #include <asm/cpu.h> #ifdef CONFIG_ACPI @@ -1093,6 +1092,26 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } +static u64 xen_read_msr(unsigned int msr) +{ + /* + * This will silently swallow a #GP from RDMSR. It may be worth + * changing that. + */ + int err; + + return xen_read_msr_safe(msr, &err); +} + +static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) +{ + /* + * This will silently swallow a #GP from WRMSR. It may be worth + * changing that. + */ + xen_write_msr_safe(msr, low, high); +} + void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1187,13 +1206,11 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, } static const struct pv_info xen_info __initconst = { - .paravirt_enabled = 1, .shared_kernel_pmd = 0, #ifdef CONFIG_X86_64 .extra_user_64bit_cs = FLAT_USER_CS64, #endif - .features = 0, .name = "Xen", }; @@ -1223,8 +1240,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .wbinvd = native_wbinvd, - .read_msr = xen_read_msr_safe, - .write_msr = xen_write_msr_safe, + .read_msr = xen_read_msr, + .write_msr = xen_write_msr, + + .read_msr_safe = xen_read_msr_safe, + .write_msr_safe = xen_write_msr_safe, .read_pmc = xen_read_pmc, @@ -1469,10 +1489,10 @@ static void xen_pvh_set_cr_flags(int cpu) * For BSP, PSE PGE are set in probe_page_size_mask(), for APs * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). */ - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) cr4_set_bits_and_update_boot(X86_CR4_PSE); - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) cr4_set_bits_and_update_boot(X86_CR4_PGE); } @@ -1506,12 +1526,16 @@ static void __init xen_pvh_early_guest_init(void) } #endif /* CONFIG_XEN_PVH */ +static void __init xen_dom0_set_legacy_features(void) +{ + x86_platform.legacy.rtc = 1; +} + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; - u64 pat; int rc; if (!xen_start_info) @@ -1527,8 +1551,6 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; - if (xen_initial_domain()) - pv_info.features |= PV_SUPPORTED_RTC; pv_init_ops = xen_init_ops; if (!xen_pvh_domain()) { pv_cpu_ops = xen_cpu_ops; @@ -1618,13 +1640,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_start_info->nr_pages); xen_reserve_special_pages(); - /* - * Modify the cache mode translation tables to match Xen's PAT - * configuration. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - pat_init_cache_modes(pat); - /* keep using Xen gdt for now; no urgent need to change it */ #ifdef CONFIG_X86_32 @@ -1670,6 +1685,7 @@ asmlinkage __visible void __init xen_start_kernel(void) boot_params.hdr.ramdisk_image = initrd_start; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); + boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN; if (!xen_initial_domain()) { add_preferred_console("xenboot", 0, NULL); @@ -1687,6 +1703,8 @@ asmlinkage __visible void __init xen_start_kernel(void) .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, }; + x86_platform.set_legacy_features = + xen_dom0_set_legacy_features; xen_init_vga(info, xen_start_info->console.dom0.info_size); xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index b56855a1382a..28cf4c5d65ef 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += mm-arch-hooks.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h +generic-y += rwsem.h generic-y += sections.h generic-y += siginfo.h generic-y += statfs.h diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h deleted file mode 100644 index 249619e7e7f2..000000000000 --- a/arch/xtensa/include/asm/rwsem.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * include/asm-xtensa/rwsem.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Largely copied from include/asm-ppc/rwsem.h - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ - -#ifndef _XTENSA_RWSEM_H -#define _XTENSA_RWSEM_H - -#ifndef _LINUX_RWSEM_H -#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." -#endif - -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS (-0x00010000) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - -/* - * lock for reading - */ -static inline void __down_read(struct rw_semaphore *sem) -{ - if (atomic_add_return(1,(atomic_t *)(&sem->count)) > 0) - smp_wmb(); - else - rwsem_down_read_failed(sem); -} - -static inline int __down_read_trylock(struct rw_semaphore *sem) -{ - int tmp; - - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg(&sem->count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { - smp_wmb(); - return 1; - } - } - return 0; -} - -/* - * lock for writing - */ -static inline void __down_write(struct rw_semaphore *sem) -{ - int tmp; - - tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_t *)(&sem->count)); - if (tmp == RWSEM_ACTIVE_WRITE_BIAS) - smp_wmb(); - else - rwsem_down_write_failed(sem); -} - -static inline int __down_write_trylock(struct rw_semaphore *sem) -{ - int tmp; - - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); - smp_wmb(); - return tmp == RWSEM_UNLOCKED_VALUE; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - int tmp; - - smp_wmb(); - tmp = atomic_sub_return(1,(atomic_t *)(&sem->count)); - if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0) - rwsem_wake(sem); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - smp_wmb(); - if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_t *)(&sem->count)) < 0) - rwsem_wake(sem); -} - -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) -{ - atomic_add(delta, (atomic_t *)(&sem->count)); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - int tmp; - - smp_wmb(); - tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count)); - if (tmp < 0) - rwsem_downgrade_wake(sem); -} - -/* - * implement exchange and add functionality - */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) -{ - smp_mb(); - return atomic_add_return(delta, (atomic_t *)(&sem->count)); -} - -#endif /* _XTENSA_RWSEM_H */ diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index 54f01188c29c..a6b00b3af429 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data) void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH, + xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack, callchain_trace, NULL, entry); } void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH, + xtensa_backtrace_user(regs, sysctl_perf_event_max_stack, callchain_trace, entry); } |