diff options
Diffstat (limited to 'arch/x86/kernel')
43 files changed, 759 insertions, 573 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e8fea7ffa306..06635fbca81c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -32,7 +32,7 @@ #include <linux/dmi.h> #include <linux/irq.h> #include <linux/slab.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/ioport.h> #include <linux/pci.h> #include <linux/efi-bgrt.h> @@ -933,7 +933,8 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) * the resource tree during the lateinit timeframe. */ #define HPET_RESOURCE_NAME_SIZE 9 - hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); + hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE, + SMP_CACHE_BYTES); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; @@ -1775,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) u64 x86_default_get_root_pointer(void) { - return boot_params.hdr.acpi_rsdp_addr; + return boot_params.acpi_rsdp_addr; } diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f1915b744052..ca13851f0570 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -7,7 +7,6 @@ */ #include <linux/acpi.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/dmi.h> #include <linux/cpumask.h> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ab731ab09f06..32b2b7a41ef5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -20,7 +20,7 @@ #include <linux/acpi_pmtmr.h> #include <linux/clockchips.h> #include <linux/interrupt.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/ftrace.h> #include <linux/ioport.h> #include <linux/export.h> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ff0d14cd9e82..2953bbf05c08 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -47,7 +47,7 @@ #include <linux/kthread.h> #include <linux/jiffies.h> /* time_after() */ #include <linux/slab.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <asm/irqdomain.h> #include <asm/io.h> @@ -2578,7 +2578,7 @@ static struct resource * __init ioapic_setup_resources(void) n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); n *= nr_ioapics; - mem = alloc_bootmem(n); + mem = memblock_alloc(n, SMP_CACHE_BYTES); res = (void *)mem; mem += sizeof(struct resource) * nr_ioapics; @@ -2621,7 +2621,8 @@ void __init io_apic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long)memblock_alloc(PAGE_SIZE, + PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c37e66e493bf..500278f5308e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/nospec.h> #include <linux/prctl.h> +#include <linux/sched/smt.h> #include <asm/spec-ctrl.h> #include <asm/cmdline.h> @@ -53,6 +54,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; +/* Control conditional STIPB in switch_to() */ +DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); +/* Control conditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +/* Control unconditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + void __init check_bugs(void) { identify_boot_cpu(); @@ -123,31 +131,6 @@ void __init check_bugs(void) #endif } -/* The kernel command line selection */ -enum spectre_v2_mitigation_cmd { - SPECTRE_V2_CMD_NONE, - SPECTRE_V2_CMD_AUTO, - SPECTRE_V2_CMD_FORCE, - SPECTRE_V2_CMD_RETPOLINE, - SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, -}; - -static const char *spectre_v2_strings[] = { - [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", - [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", -}; - -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -169,6 +152,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + /* Conditional STIBP enabled? */ + if (static_branch_unlikely(&switch_to_cond_stibp)) + hostval |= stibp_tif_to_spec_ctrl(ti->flags); + if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -202,7 +189,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ssbd_spec_ctrl_to_tif(hostval); - speculative_store_bypass_update(tif); + speculation_ctrl_update(tif); } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); @@ -217,6 +204,15 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + +static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = + SPECTRE_V2_USER_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -238,67 +234,217 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif -static void __init spec2_print_if_insecure(const char *reason) +static inline bool match_option(const char *arg, int arglen, const char *opt) { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); } -static void __init spec2_print_if_secure(const char *reason) +/* The kernel command line selection for spectre v2 */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +enum spectre_v2_user_cmd { + SPECTRE_V2_USER_CMD_NONE, + SPECTRE_V2_USER_CMD_AUTO, + SPECTRE_V2_USER_CMD_FORCE, + SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_PRCTL_IBPB, + SPECTRE_V2_USER_CMD_SECCOMP, + SPECTRE_V2_USER_CMD_SECCOMP_IBPB, +}; + +static const char * const spectre_v2_user_strings[] = { + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", +}; + +static const struct { + const char *option; + enum spectre_v2_user_cmd cmd; + bool secure; +} v2_user_options[] __initdata = { + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, +}; + +static void __init spec_v2_user_print_cond(const char *reason, bool secure) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); } -static inline bool retp_compiler(void) +static enum spectre_v2_user_cmd __init +spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) { - return __is_defined(RETPOLINE); + char arg[20]; + int ret, i; + + switch (v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: + return SPECTRE_V2_USER_CMD_FORCE; + default: + break; + } + + ret = cmdline_find_option(boot_command_line, "spectre_v2_user", + arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_USER_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { + if (match_option(arg, ret, v2_user_options[i].option)) { + spec_v2_user_print_cond(v2_user_options[i].option, + v2_user_options[i].secure); + return v2_user_options[i].cmd; + } + } + + pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool match_option(const char *arg, int arglen, const char *opt) +static void __init +spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { - int len = strlen(opt); + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); + enum spectre_v2_user_cmd cmd; - return len == arglen && !strncmp(arg, opt, len); + if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) + return; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + + cmd = spectre_v2_parse_user_cmdline(v2_cmd); + switch (cmd) { + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; + case SPECTRE_V2_USER_CMD_FORCE: + mode = SPECTRE_V2_USER_STRICT; + break; + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + mode = SPECTRE_V2_USER_PRCTL; + break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPECTRE_V2_USER_SECCOMP; + else + mode = SPECTRE_V2_USER_PRCTL; + break; + } + + /* Initialize Indirect Branch Prediction Barrier */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + + switch (cmd) { + case SPECTRE_V2_USER_CMD_FORCE: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: + static_branch_enable(&switch_mm_always_ibpb); + break; + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + static_branch_enable(&switch_mm_cond_ibpb); + break; + default: + break; + } + + pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", + static_key_enabled(&switch_mm_always_ibpb) ? + "always-on" : "conditional"); + } + + /* If enhanced IBRS is enabled no STIPB required */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + + /* + * If SMT is not possible or STIBP is not available clear the STIPB + * mode. + */ + if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + mode = SPECTRE_V2_USER_NONE; +set_mode: + spectre_v2_user = mode; + /* Only print the STIBP mode when SMT possible */ + if (smt_possible) + pr_info("%s\n", spectre_v2_user_strings[mode]); } +static const char * const spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +}; + static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] = { - { "off", SPECTRE_V2_CMD_NONE, false }, - { "on", SPECTRE_V2_CMD_FORCE, true }, - { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, - { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, - { "auto", SPECTRE_V2_CMD_AUTO, false }, +} mitigation_options[] __initdata = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, }; +static void __init spec_v2_print_cond(const char *reason, bool secure) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("%s selected on command line.\n", reason); +} + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; char arg[20]; int ret, i; - enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; - else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { - if (!match_option(arg, ret, mitigation_options[i].option)) - continue; - cmd = mitigation_options[i].cmd; - break; - } + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; - if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || @@ -316,54 +462,11 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (mitigation_options[i].secure) - spec2_print_if_secure(mitigation_options[i].option); - else - spec2_print_if_insecure(mitigation_options[i].option); - + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) - mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -417,14 +520,12 @@ retpoline_auto: pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : - SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + mode = SPECTRE_V2_RETPOLINE_AMD; setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } else { retpoline_generic: - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : - SPECTRE_V2_RETPOLINE_MINIMAL; + mode = SPECTRE_V2_RETPOLINE_GENERIC; setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } @@ -443,12 +544,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -465,10 +560,67 @@ specv2_set_mode: pr_info("Enabling Restricted Speculation for firmware calls\n"); } + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + spectre_v2_user_select_mitigation(cmd); + /* Enable STIBP if appropriate */ arch_smt_update(); } +static void update_stibp_msr(void * __unused) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +/* Update x86_spec_ctrl_base in case SMT state changed. */ +static void update_stibp_strict(void) +{ + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask == x86_spec_ctrl_base) + return; + + pr_info("Update user space SMT mitigation: STIBP %s\n", + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); +} + +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ +static void update_indir_branch_cond(void) +{ + if (sched_smt_active()) + static_branch_enable(&switch_to_cond_stibp); + else + static_branch_disable(&switch_to_cond_stibp); +} + +void arch_smt_update(void) +{ + /* Enhanced IBRS implies STIBP. No update required. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + + mutex_lock(&spec_ctrl_mutex); + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + update_stibp_strict(); + break; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + update_indir_branch_cond(); + break; + } + + mutex_unlock(&spec_ctrl_mutex); +} + #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt @@ -483,7 +635,7 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_SECCOMP, }; -static const char *ssb_strings[] = { +static const char * const ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", @@ -493,7 +645,7 @@ static const char *ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] = { +} ssb_mitigation_options[] __initdata = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ @@ -604,10 +756,25 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +static void task_update_spec_tif(struct task_struct *tsk) { - bool update; + /* Force the update of the real TIF bits */ + set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); + /* + * Immediately update the speculation control MSRs for the current + * task, but for a non-current task delay setting the CPU + * mitigation until it is scheduled next. + * + * This can only happen for SECCOMP mitigation. For PRCTL it's + * always the current task. + */ + if (tsk == current) + speculation_ctrl_update_current(); +} + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; @@ -618,28 +785,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task); break; default: return -ERANGE; } + return 0; +} - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ - if (task == current && update) - speculative_store_bypass_update_current(); - +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } return 0; } @@ -649,6 +844,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -659,6 +856,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -681,11 +880,35 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -823,7 +1046,7 @@ early_param("l1tf", l1tf_cmdline); #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" #if IS_ENABLED(CONFIG_KVM_INTEL) -static const char *l1tf_vmx_states[] = { +static const char * const l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_AUTO] = "auto", [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", @@ -839,13 +1062,14 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && - cpu_smt_control == CPU_SMT_ENABLED)) + sched_smt_active())) { return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation]); + } return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation], - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); + sched_smt_active() ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) @@ -854,11 +1078,39 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static char *stibp_state(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return ""; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + if (static_key_enabled(&switch_to_cond_stibp)) + return ", STIBP: conditional"; + } + return ""; +} + +static char *ibpb_state(void) +{ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + if (static_key_enabled(&switch_mm_always_ibpb)) + return ", IBPB: always-on"; + if (static_key_enabled(&switch_mm_cond_ibpb)) + return ", IBPB: conditional"; + return ", IBPB: disabled"; + } + return ""; +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -876,13 +1128,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 660d0b22e962..ffb181f959d2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,7 +1,7 @@ /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/linkage.h> #include <linux/bitops.h> #include <linux/kernel.h> @@ -1074,7 +1074,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + memset(&c->x86_capability, 0, sizeof(c->x86_capability)); c->extended_cpuid_level = 0; if (!have_cpuid_p()) @@ -1317,7 +1317,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + memset(&c->x86_capability, 0, sizeof(c->x86_capability)); generic_identify(c); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8cb3c02980cf..36d2696c9563 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -485,7 +485,7 @@ static void mce_report_event(struct pt_regs *regs) * be somewhat complicated (e.g. segment offset would require an instruction * parser). So only support physical addresses up to page granuality for now. */ -static int mce_usable_address(struct mce *m) +int mce_usable_address(struct mce *m) { if (!(m->status & MCI_STATUS_ADDRV)) return 0; @@ -505,6 +505,7 @@ static int mce_usable_address(struct mce *m) return 1; } +EXPORT_SYMBOL_GPL(mce_usable_address); bool mce_is_memory_error(struct mce *m) { @@ -534,7 +535,7 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); -static bool mce_is_correctable(struct mce *m) +bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) return false; @@ -547,6 +548,7 @@ static bool mce_is_correctable(struct mce *m) return true; } +EXPORT_SYMBOL_GPL(mce_is_correctable); static bool cec_add_mce(struct mce *m) { @@ -2215,7 +2217,7 @@ static int mce_device_create(unsigned int cpu) if (dev) return 0; - dev = kzalloc(sizeof *dev, GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; dev->id = cpu; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index dd33c357548f..e12454e21b8a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -56,7 +56,7 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -static bool thresholding_en; +static bool thresholding_irq_en; static const char * const th_names[] = { "load_store", @@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, set_offset: offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; + if (offset == new) + thresholding_irq_en = true; done: mce_threshold_block_init(&b, offset); @@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - if (!thresholding_en) - return 0; - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu) struct threshold_bank **bp; int err = 0; - if (!thresholding_en) - return 0; - bp = per_cpu(threshold_banks, cpu); if (bp) return 0; @@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void) { unsigned lcpu = 0; - if (mce_threshold_vector == amd_threshold_interrupt) - thresholding_en = true; - /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = mce_threshold_create_device(lcpu); @@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void) return err; } + if (thresholding_irq_en) + mce_threshold_vector = amd_threshold_interrupt; + return 0; } /* diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b9bc8a1a584e..2637ff09d6a0 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -666,8 +666,8 @@ static ssize_t pf_show(struct device *dev, } static DEVICE_ATTR_WO(reload); -static DEVICE_ATTR(version, 0400, version_show, NULL); -static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); +static DEVICE_ATTR(version, 0444, version_show, NULL); +static DEVICE_ATTR(processor_flags, 0444, pf_show, NULL); static struct attribute *mc_default_attrs[] = { &dev_attr_version.attr, diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 1c72f3819eb1..e81a2db42df7 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kexec.h> +#include <linux/i8253.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -295,6 +296,16 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; + /* + * Hyper-V VMs have a PIT emulation quirk such that zeroing the + * counter register during PIT shutdown restarts the PIT. So it + * continues to interrupt @18.2 HZ. Setting i8253_clear_counter + * to false tells pit_shutdown() not to zero the counter so that + * the PIT really is shutdown. Generation 2 VMs don't have a PIT, + * and setting this value has no effect. + */ + i8253_clear_counter_on_shutdown = false; + #if IS_ENABLED(CONFIG_HYPERV) /* * Setup the hook to get control post apic initialization. diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e12ee86906c6..86e277f8daf4 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -798,7 +798,7 @@ static void generic_set_all(void) local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ - for (count = 0; count < sizeof mask * 8; ++count) { + for (count = 0; count < sizeof(mask) * 8; ++count) { if (mask & 0x01) set_bit(count, &smp_changes_mask); mask >>= 1; diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 40eee6cc4124..2e173d47b450 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -174,12 +174,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) case MTRRIOC_SET_PAGE_ENTRY: case MTRRIOC_DEL_PAGE_ENTRY: case MTRRIOC_KILL_PAGE_ENTRY: - if (copy_from_user(&sentry, arg, sizeof sentry)) + if (copy_from_user(&sentry, arg, sizeof(sentry))) return -EFAULT; break; case MTRRIOC_GET_ENTRY: case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) + if (copy_from_user(&gentry, arg, sizeof(gentry))) return -EFAULT; break; #ifdef CONFIG_COMPAT @@ -332,7 +332,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) switch (cmd) { case MTRRIOC_GET_ENTRY: case MTRRIOC_GET_PAGE_ENTRY: - if (copy_to_user(arg, &gentry, sizeof gentry)) + if (copy_to_user(arg, &gentry, sizeof(gentry))) err = -EFAULT; break; #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d9ab49bed8af..0eda91f8eeac 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s) } early_param("no-vmw-sched-clock", setup_vmw_sched_clock); -static unsigned long long vmware_sched_clock(void) +static unsigned long long notrace vmware_sched_clock(void) { unsigned long long ns; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index f39f3a06c26f..7299dcbf8e85 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -140,7 +140,7 @@ static void __init dtb_cpu_setup(void) int ret; version = GET_APIC_VERSION(apic_read(APIC_LVR)); - for_each_node_by_type(dn, "cpu") { + for_each_of_cpu_node(dn) { ret = of_property_read_u32(dn, "reg", &apic_id); if (ret < 0) { pr_warn("%pOF: missing local APIC ID\n", dn); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c88c23c658c1..50895c2f937d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -9,11 +9,10 @@ * allocation code routines via a platform independent interface (memblock, etc.). */ #include <linux/crash_dump.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/suspend.h> #include <linux/acpi.h> #include <linux/firmware-map.h> -#include <linux/memblock.h> #include <linux/sort.h> #include <asm/e820/api.h> @@ -1094,7 +1093,8 @@ void __init e820__reserve_resources(void) struct resource *res; u64 end; - res = alloc_bootmem(sizeof(*res) * e820_table->nr_entries); + res = memblock_alloc(sizeof(*res) * e820_table->nr_entries, + SMP_CACHE_BYTES); e820_res = res; for (i = 0; i < e820_table->nr_entries; i++) { @@ -1248,7 +1248,6 @@ void __init e820__memblock_setup(void) { int i; u64 end; - u64 addr = 0; /* * The bootstrap memblock region count maximum is 128 entries @@ -1265,21 +1264,13 @@ void __init e820__memblock_setup(void) struct e820_entry *entry = &e820_table->entries[i]; end = entry->addr + entry->size; - if (addr < entry->addr) - memblock_reserve(addr, entry->addr - addr); - addr = end; if (end != (resource_size_t)end) continue; - /* - * all !E820_TYPE_RAM ranges (including gap ranges) are put - * into memblock.reserved to make sure that struct pages in - * such regions are not left uninitialized after bootup. - */ if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) - memblock_reserve(entry->addr, entry->size); - else - memblock_add(entry->addr, entry->size); + continue; + + memblock_add(entry->addr, entry->size); } /* Throw away partial pages: */ diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 5e801c8c8ce7..374a52fa5296 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -213,8 +213,9 @@ static unsigned int mem32_serial_in(unsigned long addr, int offset) * early_pci_serial_init() * * This function is invoked when the early_printk param starts with "pciserial" - * The rest of the param should be ",B:D.F,baud" where B, D & F describe the - * location of a PCI device that must be a UART device. + * The rest of the param should be "[force],B:D.F,baud", where B, D & F describe + * the location of a PCI device that must be a UART device. "force" is optional + * and overrides the use of an UART device with a wrong PCI class code. */ static __init void early_pci_serial_init(char *s) { @@ -224,17 +225,23 @@ static __init void early_pci_serial_init(char *s) u32 classcode, bar0; u16 cmdreg; char *e; + int force = 0; - - /* - * First, part the param to get the BDF values - */ if (*s == ',') ++s; if (*s == 0) return; + /* Force the use of an UART device with wrong class code */ + if (!strncmp(s, "force,", 6)) { + force = 1; + s += 6; + } + + /* + * Part the param to get the BDF values + */ bus = (u8)simple_strtoul(s, &e, 16); s = e; if (*s != ':') @@ -253,7 +260,7 @@ static __init void early_pci_serial_init(char *s) s++; /* - * Second, find the device from the BDF + * Find the device from the BDF */ cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND); classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); @@ -264,8 +271,10 @@ static __init void early_pci_serial_init(char *s) */ if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) && (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) || - (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ - return; + (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ { + if (!force) + return; + } /* * Determine if it is IO or memory mapped @@ -289,7 +298,7 @@ static __init void early_pci_serial_init(char *s) } /* - * Lastly, initialize the hardware + * Initialize the hardware */ if (*s) { if (strcmp(s, "nocfg") == 0) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 61a949d84dfa..d99a8ee9e185 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } + local_bh_disable(); fpu->initialized = 1; - preempt_disable(); fpu__restore(fpu); - preempt_enable(); + local_bh_enable(); return err; } else { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 01ebcb6f263e..7ee8067cbf45 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, { unsigned long old; int faulted; - struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, return; } - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { + if (function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = old; - return; - } - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, parent) == -EBUSY) { - *parent = old; - return; - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 76fa3b836598..ec6fefbfd3c0 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void) cr4_init_shadow(); sanitize_boot_params(&boot_params); - x86_verify_bootdata_version(); x86_early_init_platform_quirks(); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5dc377dc9d7b..16b1cbd3a61e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -385,7 +385,7 @@ static void __init copy_bootdata(char *real_mode_data) */ sme_map_bootdata(real_mode_data); - memcpy(&boot_params, real_mode_data, sizeof boot_params); + memcpy(&boot_params, real_mode_data, sizeof(boot_params)); sanitize_boot_params(&boot_params); cmd_line_ptr = get_cmd_line_ptr(); if (cmd_line_ptr) { @@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data) if (!boot_params.hdr.version) copy_bootdata(__va(real_mode_data)); - x86_verify_bootdata_version(); - x86_early_init_platform_quirks(); switch (boot_params.hdr.hardware_subarch) { diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ab18e0884dc6..6135ae8ce036 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -199,14 +199,6 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm) /* * If PTI is enabled, this maps the LDT into the kernelmode and * usermode tables for the given mm. - * - * There is no corresponding unmap function. Even if the LDT is freed, we - * leave the PTEs around until the slot is reused or the mm is destroyed. - * This is harmless: the LDT is always in ordinary memory, and no one will - * access the freed slot. - * - * If we wanted to unmap freed LDTs, we'd also need to do a flush to make - * it useful, and the flush would slow down modify_ldt(). */ static int map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) @@ -214,8 +206,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) unsigned long va; bool is_vmalloc; spinlock_t *ptl; - pgd_t *pgd; - int i; + int i, nr_pages; if (!static_cpu_has(X86_FEATURE_PTI)) return 0; @@ -229,16 +220,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) /* Check if the current mappings are sane */ sanity_check_ldt_mapping(mm); - /* - * Did we already have the top level entry allocated? We can't - * use pgd_none() for this because it doens't do anything on - * 4-level page table kernels. - */ - pgd = pgd_offset(mm, LDT_BASE_ADDR); - is_vmalloc = is_vmalloc_addr(ldt->entries); - for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { unsigned long offset = i << PAGE_SHIFT; const void *src = (char *)ldt->entries + offset; unsigned long pfn; @@ -272,13 +258,39 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) /* Propagate LDT mapping to the user page-table */ map_ldt_struct_to_user(mm); - va = (unsigned long)ldt_slot_va(slot); - flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false); - ldt->slot = slot; return 0; } +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ + unsigned long va; + int i, nr_pages; + + if (!ldt) + return; + + /* LDT map/unmap is only required for PTI */ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = i << PAGE_SHIFT; + spinlock_t *ptl; + pte_t *ptep; + + va = (unsigned long)ldt_slot_va(ldt->slot) + offset; + ptep = get_locked_pte(mm, va, &ptl); + pte_clear(mm, va, ptep); + pte_unmap_unlock(ptep, ptl); + } + + va = (unsigned long)ldt_slot_va(ldt->slot); + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); +} + #else /* !CONFIG_PAGE_TABLE_ISOLATION */ static int @@ -286,6 +298,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) { return 0; } + +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ +} #endif /* CONFIG_PAGE_TABLE_ISOLATION */ static void free_ldt_pgtables(struct mm_struct *mm) @@ -524,6 +540,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } install_ldt(mm, new_ldt); + unmap_ldt_struct(mm, old_ldt); free_ldt_struct(old_ldt); error = 0; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f1c5eb99d445..3482460d984d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -11,7 +11,6 @@ #include <linux/mm.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index ef688804f80d..4588414e2561 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -115,14 +115,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) err = -EBADF; break; } - if (copy_from_user(®s, uregs, sizeof regs)) { + if (copy_from_user(®s, uregs, sizeof(regs))) { err = -EFAULT; break; } err = rdmsr_safe_regs_on_cpu(cpu, regs); if (err) break; - if (copy_to_user(uregs, ®s, sizeof regs)) + if (copy_to_user(uregs, ®s, sizeof(regs))) err = -EFAULT; break; @@ -131,14 +131,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) err = -EBADF; break; } - if (copy_from_user(®s, uregs, sizeof regs)) { + if (copy_from_user(®s, uregs, sizeof(regs))) { err = -EFAULT; break; } err = wrmsr_safe_regs_on_cpu(cpu, regs); if (err) break; - if (copy_to_user(uregs, ®s, sizeof regs)) + if (copy_to_user(uregs, ®s, sizeof(regs))) err = -EFAULT; break; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4d4df37922a..c0e0101133f3 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -56,17 +56,6 @@ asm (".pushsection .entry.text, \"ax\"\n" ".type _paravirt_nop, @function\n\t" ".popsection"); -/* identity function, which can be inlined */ -u32 notrace _paravirt_ident_32(u32 x) -{ - return x; -} - -u64 notrace _paravirt_ident_64(u64 x) -{ - return x; -} - void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -102,6 +91,12 @@ static unsigned paravirt_patch_call(void *insnbuf, const void *target, } #ifdef CONFIG_PARAVIRT_XXL +/* identity function, which can be inlined */ +u64 notrace _paravirt_ident_64(u64 x) +{ + return x; +} + static unsigned paravirt_patch_jmp(void *insnbuf, const void *target, unsigned long addr, unsigned len) { @@ -146,13 +141,11 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf, else if (opfunc == _paravirt_nop) ret = 0; +#ifdef CONFIG_PARAVIRT_XXL /* identity functions just return their single argument */ - else if (opfunc == _paravirt_ident_32) - ret = paravirt_patch_ident_32(insnbuf, len); else if (opfunc == _paravirt_ident_64) ret = paravirt_patch_ident_64(insnbuf, len); -#ifdef CONFIG_PARAVIRT_XXL else if (type == PARAVIRT_PATCH(cpu.iret) || type == PARAVIRT_PATCH(cpu.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -309,13 +302,8 @@ struct pv_info pv_info = { #endif }; -#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) -/* 32-bit pagetable entries */ -#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) -#else /* 64-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) -#endif struct paravirt_patch_template pv_ops = { /* Init ops. */ @@ -483,5 +471,5 @@ NOKPROBE_SYMBOL(native_set_debugreg); NOKPROBE_SYMBOL(native_load_idt); #endif -EXPORT_SYMBOL_GPL(pv_ops); +EXPORT_SYMBOL(pv_ops); EXPORT_SYMBOL_GPL(pv_info); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 6368c22fa1fa..de138d3912e4 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -10,24 +10,18 @@ DEF_NATIVE(cpu, iret, "iret"); DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax"); -#endif - -#if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)"); -DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); -#endif - -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) -{ - /* arg in %eax, return in %eax */ - return 0; -} unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) { /* arg in %edx:%eax, return in %edx:%eax */ return 0; } +#endif + +#if defined(CONFIG_PARAVIRT_SPINLOCKS) +DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)"); +DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); +#endif extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 7ca9cb726f4d..9d9e04b31077 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -15,27 +15,19 @@ DEF_NATIVE(cpu, wbinvd, "wbinvd"); DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(cpu, swapgs, "swapgs"); -#endif - -DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov64, "mov %rdi, %rax"); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)"); -DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); -#endif - -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) -{ - return paravirt_patch_insns(insnbuf, len, - start__mov32, end__mov32); -} - unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) { return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64); } +#endif + +#if defined(CONFIG_PARAVIRT_SPINLOCKS) +DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)"); +DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); +#endif extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 7ba73fe0d917..f4562fcec681 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -3,7 +3,7 @@ #include <linux/dma-debug.h> #include <linux/dmar.h> #include <linux/export.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/gfp.h> #include <linux/pci.h> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 71c0b01d93b1..bd08b9e1c9e2 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -5,7 +5,7 @@ #include <linux/cache.h> #include <linux/init.h> #include <linux/swiotlb.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/dma-direct.h> #include <linux/mem_encrypt.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c93fcfdf1673..7d31192296a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -40,6 +40,8 @@ #include <asm/prctl.h> #include <asm/spec-ctrl.h> +#include "process.h" + /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned @@ -252,11 +254,12 @@ void arch_setup_new_exec(void) enable_cpuid(); } -static inline void switch_to_bitmap(struct tss_struct *tss, - struct thread_struct *prev, +static inline void switch_to_bitmap(struct thread_struct *prev, struct thread_struct *next, unsigned long tifp, unsigned long tifn) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. @@ -395,32 +398,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void intel_set_ssb_state(unsigned long tifn) +/* + * Update the MSRs managing speculation control, during context switch. + * + * tifp: Previous task's thread flags + * tifn: Next task's thread flags + */ +static __always_inline void __speculation_ctrl_update(unsigned long tifp, + unsigned long tifn) { - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + unsigned long tif_diff = tifp ^ tifn; + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; + + /* + * If TIF_SSBD is different, select the proper mitigation + * method. Note that if SSBD mitigation is disabled or permanentely + * enabled this branch can't be taken because nothing can set + * TIF_SSBD. + */ + if (tif_diff & _TIF_SSBD) { + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + amd_set_ssb_virt_state(tifn); + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + amd_set_core_ssb_state(tifn); + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr |= ssbd_tif_to_spec_ctrl(tifn); + updmsr = true; + } + } + + /* + * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, + * otherwise avoid the MSR write. + */ + if (IS_ENABLED(CONFIG_SMP) && + static_branch_unlikely(&switch_to_cond_stibp)) { + updmsr |= !!(tif_diff & _TIF_SPEC_IB); + msr |= stibp_tif_to_spec_ctrl(tifn); + } - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + if (updmsr) + wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) - amd_set_ssb_virt_state(tifn); - else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - intel_set_ssb_state(tifn); + if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { + if (task_spec_ssb_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SSBD); + else + clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); + } + /* Return the updated threadinfo flags*/ + return task_thread_info(tsk)->flags; } -void speculative_store_bypass_update(unsigned long tif) +void speculation_ctrl_update(unsigned long tif) { + /* Forced update. Make sure all relevant TIF flags are different */ preempt_disable(); - __speculative_store_bypass_update(tif); + __speculation_ctrl_update(~tif, tif); preempt_enable(); } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +/* Called from seccomp/prctl update */ +void speculation_ctrl_update_current(void) +{ + preempt_disable(); + speculation_ctrl_update(speculation_ctrl_update_tif(current)); + preempt_enable(); +} + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; unsigned long tifp, tifn; @@ -430,7 +486,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(tss, prev, next, tifp, tifn); + switch_to_bitmap(prev, next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); @@ -451,8 +507,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); + if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { + __speculation_ctrl_update(tifp, tifn); + } else { + speculation_ctrl_update_tif(prev_p); + tifn = speculation_ctrl_update_tif(next_p); + + /* Enforce MSR update to ensure consistent state */ + __speculation_ctrl_update(~tifn, tifn); + } } /* diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h new file mode 100644 index 000000000000..898e97cf6629 --- /dev/null +++ b/arch/x86/kernel/process.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Code shared between 32 and 64 bit + +#include <asm/spec-ctrl.h> + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + +/* + * This needs to be inline to optimize for the common case where no extra + * work needs to be done. + */ +static inline void switch_to_extra(struct task_struct *prev, + struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long prev_tif = task_thread_info(prev)->flags; + + if (IS_ENABLED(CONFIG_SMP)) { + /* + * Avoid __switch_to_xtra() invocation when conditional + * STIPB is disabled and the only different bit is + * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not + * in the TIF_WORK_CTXSW masks. + */ + if (!static_branch_likely(&switch_to_cond_stibp)) { + prev_tif &= ~_TIF_SPEC_IB; + next_tif &= ~_TIF_SPEC_IB; + } + } + + /* + * __switch_to_xtra() handles debug registers, i/o bitmaps, + * speculation mitigations etc. + */ + if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || + prev_tif & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev, next); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5046a3c9dec2..d3e593eb189f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,6 +59,8 @@ #include <asm/intel_rdt_sched.h> #include <asm/proto.h> +#include "process.h" + void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 31b4755369f0..bbfbf017065c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ #include <asm/unistd_32_ia32.h> #endif +#include "process.h" + /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { @@ -553,7 +555,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -617,12 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload sp0. */ update_task_stack(next_p); - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); #ifdef CONFIG_XEN_PV /* @@ -701,10 +697,10 @@ static void __set_personality_x32(void) current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; /* - * in_compat_syscall() uses the presence of the x32 syscall bit + * in_32bit_syscall() uses the presence of the x32 syscall bit * flag to determine compat status. The x86 mmap() code relies on * the syscall bitness so set x32 syscall bit right here to make - * in_compat_syscall() work during exec(). + * in_32bit_syscall() work during exec(). * * Pretend to come from a x32 execve. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d8f49c7384a3..ffae9b9740fd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1359,33 +1359,18 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -static void fill_sigtrap_info(struct task_struct *tsk, - struct pt_regs *regs, - int error_code, int si_code, - struct siginfo *info) +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) { tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; - info->si_signo = SIGTRAP; - info->si_code = si_code; - info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; -} - -void user_single_step_siginfo(struct task_struct *tsk, - struct pt_regs *regs, - struct siginfo *info) -{ - fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); + /* Send us the fake SIGTRAP */ + force_sig_fault(SIGTRAP, si_code, + user_mode(regs) ? (void __user *)regs->ip : NULL, tsk); } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code) +void user_single_step_report(struct pt_regs *regs) { - struct siginfo info; - - clear_siginfo(&info); - fill_sigtrap_info(tsk, regs, error_code, si_code, &info); - /* Send us the fake SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); + send_sigtrap(current, regs, 0, TRAP_BRKPT); } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 637982efecd8..9b158b4716d2 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -20,7 +20,7 @@ #include <linux/notifier.h> #include <linux/sched.h> #include <linux/gfp.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/nmi.h> #include <asm/fixmap.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7005f89bf3b2..d494b9bfe618 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -30,7 +30,6 @@ #include <linux/sfi.h> #include <linux/apm_bios.h> #include <linux/initrd.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/seq_file.h> #include <linux/console.h> @@ -1281,23 +1280,6 @@ void __init setup_arch(char **cmdline_p) unwind_init(); } -/* - * From boot protocol 2.14 onwards we expect the bootloader to set the - * version to "0x8000 | <used version>". In case we find a version >= 2.14 - * without the 0x8000 we assume the boot loader supports 2.13 only and - * reset the version accordingly. The 0x8000 flag is removed in any case. - */ -void __init x86_verify_bootdata_version(void) -{ - if (boot_params.hdr.version & VERSION_WRITTEN) - boot_params.hdr.version &= ~VERSION_WRITTEN; - else if (boot_params.hdr.version >= 0x020e) - boot_params.hdr.version = 0x020d; - - if (boot_params.hdr.version < 0x020e) - boot_params.hdr.acpi_rsdp_addr = 0; -} - #ifdef CONFIG_X86_32 static struct resource video_ram_resource = { diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ea554f812ee1..e8796fcd7e5a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -4,7 +4,7 @@ #include <linux/kernel.h> #include <linux/export.h> #include <linux/init.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/percpu.h> #include <linux/kexec.h> #include <linux/crash_dump.h> @@ -106,20 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, void *ptr; if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem_nopanic(size, align, goal); + ptr = memblock_alloc_from_nopanic(size, align, goal); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", cpu, size, __pa(ptr)); } else { - ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), - size, align, goal); + ptr = memblock_alloc_try_nid_nopanic(size, align, goal, + MEMBLOCK_ALLOC_ACCESSIBLE, + node); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", cpu, size, node, __pa(ptr)); } return ptr; #else - return __alloc_bootmem_nopanic(size, align, goal); + return memblock_alloc_from_nopanic(size, align, goal); #endif } @@ -133,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) static void __init pcpu_fc_free(void *ptr, size_t size) { - free_bootmem(__pa(ptr), size); + memblock_free(__pa(ptr), size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5369d7fac797..a9134d1910b9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -49,7 +49,7 @@ #include <linux/sched/hotplug.h> #include <linux/sched/task_stack.h> #include <linux/percpu.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/err.h> #include <linux/nmi.h> #include <linux/tboot.h> diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 6a78d4b36a79..f7476ce23b6e 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -105,7 +105,7 @@ out: static void find_start_end(unsigned long addr, unsigned long flags, unsigned long *begin, unsigned long *end) { - if (!in_compat_syscall() && (flags & MAP_32BIT)) { + if (!in_32bit_syscall() && (flags & MAP_32BIT)) { /* This is usually used needed to map code in small model, so it needs to be in the first 31bit. Limit it to that. This means we need to move the @@ -122,7 +122,7 @@ static void find_start_end(unsigned long addr, unsigned long flags, } *begin = get_mmap_base(1); - if (in_compat_syscall()) + if (in_32bit_syscall()) *end = task_size_32bit(); else *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW); @@ -193,7 +193,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; /* for MAP_32BIT mappings we force the legacy mmap base */ - if (!in_compat_syscall() && (flags & MAP_32BIT)) + if (!in_32bit_syscall() && (flags & MAP_32BIT)) goto bottomup; /* requesting a specific address */ @@ -217,9 +217,10 @@ get_unmapped_area: * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area * in the full address space. * - * !in_compat_syscall() check to avoid high addresses for x32. + * !in_32bit_syscall() check to avoid high addresses for x32 + * (and make it no op on native i386). */ - if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall()) + if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall()) info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; info.align_mask = 0; diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index f386bad0984e..285aaa62d153 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -30,7 +30,7 @@ #include <linux/string.h> #include <linux/pci.h> #include <linux/dma-mapping.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <asm/tce.h> #include <asm/calgary.h> #include <asm/proto.h> @@ -173,7 +173,7 @@ void * __init alloc_tce_table(void) size = table_size_to_number_of_entries(specified_table_size); size *= TCE_ENTRY_SIZE; - return __alloc_bootmem_low(size, size, 0); + return memblock_alloc_low(size, size); } void __init free_tce_table(void *tbl) @@ -186,5 +186,5 @@ void __init free_tce_table(void *tbl) size = table_size_to_number_of_entries(specified_table_size); size *= TCE_ENTRY_SIZE; - free_bootmem(__pa(tbl), size); + memblock_free(__pa(tbl), size); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5bd0a997d81e..9b7c4ca8f0a7 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -189,7 +189,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr) } static nokprobe_inline int -do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, +do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) { if (v8086_mode(regs)) { @@ -202,10 +202,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, error_code, trapnr)) return 0; } - return -1; - } - - if (!user_mode(regs)) { + } else if (!user_mode(regs)) { if (fixup_exception(regs, trapnr, error_code, 0)) return 0; @@ -214,49 +211,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, die(str, regs, error_code); } - return -1; -} - -static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, - siginfo_t *info) -{ - unsigned long siaddr; - int sicode; - - switch (trapnr) { - default: - return SEND_SIG_PRIV; - - case X86_TRAP_DE: - sicode = FPE_INTDIV; - siaddr = uprobe_get_trap_addr(regs); - break; - case X86_TRAP_UD: - sicode = ILL_ILLOPN; - siaddr = uprobe_get_trap_addr(regs); - break; - case X86_TRAP_AC: - sicode = BUS_ADRALN; - siaddr = 0; - break; - } - - info->si_signo = signr; - info->si_errno = 0; - info->si_code = sicode; - info->si_addr = (void __user *)siaddr; - return info; -} - -static void -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) -{ - struct task_struct *tsk = current; - - - if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) - return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -269,24 +223,45 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; + return -1; +} + +static void show_signal(struct task_struct *tsk, int signr, + const char *type, const char *desc, + struct pt_regs *regs, long error_code) +{ if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { - pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, + pr_info("%s[%d] %s%s ip:%lx sp:%lx error:%lx", + tsk->comm, task_pid_nr(tsk), type, desc, regs->ip, regs->sp, error_code); print_vma_addr(KERN_CONT " in ", regs->ip); pr_cont("\n"); } +} + +static void +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, int sicode, void __user *addr) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; + + show_signal(tsk, signr, "trap ", str, regs, error_code); - force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); + if (!sicode) + force_sig(signr, tsk); + else + force_sig_fault(signr, sicode, addr, tsk); } NOKPROBE_SYMBOL(do_trap); static void do_error_trap(struct pt_regs *regs, long error_code, char *str, - unsigned long trapnr, int signr) + unsigned long trapnr, int signr, int sicode, void __user *addr) { - siginfo_t info; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); /* @@ -299,26 +274,26 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); - clear_siginfo(&info); - do_trap(trapnr, signr, str, regs, error_code, - fill_trap_info(regs, signr, trapnr, &info)); + do_trap(trapnr, signr, str, regs, error_code, sicode, addr); } } -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - do_error_trap(regs, error_code, str, trapnr, signr); \ +#define IP ((void __user *)uprobe_get_trap_addr(regs)) +#define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) -DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) -DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) -DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) -DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) +DO_ERROR(X86_TRAP_DE, SIGFPE, FPE_INTDIV, IP, "divide error", divide_error) +DO_ERROR(X86_TRAP_OF, SIGSEGV, 0, NULL, "overflow", overflow) +DO_ERROR(X86_TRAP_UD, SIGILL, ILL_ILLOPN, IP, "invalid opcode", invalid_op) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) +DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) +DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check) +#undef IP #ifdef CONFIG_VMAP_STACK __visible void __noreturn handle_stack_overflow(const char *message, @@ -331,7 +306,7 @@ __visible void __noreturn handle_stack_overflow(const char *message, die(message, regs, 0); /* Be absolutely certain we don't return. */ - panic(message); + panic("%s", message); } #endif @@ -459,7 +434,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { const struct mpx_bndcsr *bndcsr; - siginfo_t *info; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, @@ -497,8 +471,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) goto exit_trap; break; /* Success, it was handled */ case 1: /* Bound violation. */ - info = mpx_generate_siginfo(regs); - if (IS_ERR(info)) { + { + struct task_struct *tsk = current; + struct mpx_fault_info mpx; + + if (mpx_fault_info(&mpx, regs)) { /* * We failed to decode the MPX instruction. Act as if * the exception was not caused by MPX. @@ -507,14 +484,20 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) } /* * Success, we decoded the instruction and retrieved - * an 'info' containing the address being accessed + * an 'mpx' containing the address being accessed * which caused the exception. This information * allows and application to possibly handle the * #BR exception itself. */ - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info); - kfree(info); + if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs, + error_code)) + break; + + show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code); + + force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper); break; + } case 0: /* No exception caused by Intel MPX operations. */ goto exit_trap; default: @@ -531,12 +514,13 @@ exit_trap: * up here if the kernel has MPX turned off at compile * time.. */ - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); } dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { + const char *desc = "general protection fault"; struct task_struct *tsk; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); @@ -570,25 +554,18 @@ do_general_protection(struct pt_regs *regs, long error_code) kprobe_fault_handler(regs, X86_TRAP_GP)) return; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + if (notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) - die("general protection fault", regs, error_code); + die(desc, regs, error_code); return; } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, task_pid_nr(tsk), - regs->ip, regs->sp, error_code); - print_vma_addr(KERN_CONT " in ", regs->ip); - pr_cont("\n"); - } + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); - force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); + force_sig(SIGSEGV, tsk); } NOKPROBE_SYMBOL(do_general_protection); @@ -631,7 +608,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) goto exit; cond_local_irq_enable(regs); - do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); + do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, 0, NULL); cond_local_irq_disable(regs); exit: @@ -845,7 +822,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; struct fpu *fpu = &task->thread.fpu; - siginfo_t info; + int si_code; char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; @@ -871,18 +848,14 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.trap_nr = trapnr; task->thread.error_code = error_code; - clear_siginfo(&info); - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)uprobe_get_trap_addr(regs); - - info.si_code = fpu__exception_code(fpu, trapnr); + si_code = fpu__exception_code(fpu, trapnr); /* Retry when we get spurious exceptions: */ - if (!info.si_code) + if (!si_code) return; - force_sig_info(SIGFPE, &info, task); + force_sig_fault(SIGFPE, si_code, + (void __user *)uprobe_get_trap_addr(regs), task); } dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) @@ -942,20 +915,13 @@ NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { - siginfo_t info; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); local_irq_enable(); - clear_siginfo(&info); - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_BADSTK; - info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + ILL_BADSTK, (void __user *)NULL); } } #endif diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index ff20b35e98dd..f8f3cfda01ae 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -271,19 +271,13 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst, */ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) { - siginfo_t info; struct task_struct *tsk = current; tsk->thread.cr2 = (unsigned long)addr; tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; tsk->thread.trap_nr = X86_TRAP_PF; - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = addr; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk); if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) return; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index deb576b23b7c..843feb94a950 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -1086,7 +1086,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n", current->pid, regs->sp, regs->ip); - force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); + force_sig(SIGSEGV, current); } return -1; diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 1eae5af491c2..891a75dbc131 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,65 +26,8 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 -#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL -/* - * Interrupt control on vSMPowered systems: - * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' - * and vice versa. - */ - -asmlinkage __visible unsigned long vsmp_save_fl(void) -{ - unsigned long flags = native_save_fl(); - - if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC)) - flags &= ~X86_EFLAGS_IF; - return flags; -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); - -__visible void vsmp_restore_fl(unsigned long flags) -{ - if (flags & X86_EFLAGS_IF) - flags &= ~X86_EFLAGS_AC; - else - flags |= X86_EFLAGS_AC; - native_restore_fl(flags); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); - -asmlinkage __visible void vsmp_irq_disable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); - -asmlinkage __visible void vsmp_irq_enable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); - -static unsigned __init vsmp_patch(u8 type, void *ibuf, - unsigned long addr, unsigned len) -{ - switch (type) { - case PARAVIRT_PATCH(irq.irq_enable): - case PARAVIRT_PATCH(irq.irq_disable): - case PARAVIRT_PATCH(irq.save_fl): - case PARAVIRT_PATCH(irq.restore_fl): - return paravirt_patch_default(type, ibuf, addr, len); - default: - return native_patch(type, ibuf, addr, len); - } - -} - -static void __init set_vsmp_pv_ops(void) +#ifdef CONFIG_PCI +static void __init set_vsmp_ctl(void) { void __iomem *address; unsigned int cap, ctl, cfg; @@ -109,28 +52,12 @@ static void __init set_vsmp_pv_ops(void) } #endif - if (cap & ctl & (1 << 4)) { - /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); - pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); - pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); - pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); - pv_ops.init.patch = vsmp_patch; - ctl &= ~(1 << 4); - } writel(ctl, address + 4); ctl = readl(address + 4); pr_info("vSMP CTL: control set to:0x%08x\n", ctl); early_iounmap(address, 8); } -#else -static void __init set_vsmp_pv_ops(void) -{ -} -#endif - -#ifdef CONFIG_PCI static int is_vsmp = -1; static void __init detect_vsmp_box(void) @@ -164,11 +91,14 @@ static int is_vsmp_box(void) { return 0; } +static void __init set_vsmp_ctl(void) +{ +} #endif static void __init vsmp_cap_cpus(void) { -#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) +#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) && defined(CONFIG_PCI) void __iomem *address; unsigned int cfg, topology, node_shift, maxcpus; @@ -221,6 +151,6 @@ void __init vsmp_init(void) vsmp_cap_cpus(); - set_vsmp_pv_ops(); + set_vsmp_ctl(); return; } |