summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/include/asm/spectre.h3
-rw-r--r--arch/arm64/kernel/proton-pack.c13
-rw-r--r--arch/arm64/lib/insn.c60
-rw-r--r--arch/arm64/net/bpf_jit_comp.c57
-rw-r--r--arch/x86/kernel/alternative.c6
-rw-r--r--drivers/platform/x86/amd/hsmp/acpi.c3
-rw-r--r--drivers/platform/x86/amd/hsmp/hsmp.h1
-rw-r--r--drivers/platform/x86/amd/hsmp/plat.c6
-rw-r--r--drivers/platform/x86/amd/pmc/pmc-quirks.c7
-rw-r--r--drivers/platform/x86/amd/pmf/tee-if.c23
-rw-r--r--drivers/platform/x86/asus-wmi.c3
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c2
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/eventpoll.c7
-rw-r--r--fs/udf/truncate.c2
-rw-r--r--fs/xattr.c24
-rw-r--r--kernel/cgroup/cpuset.c6
-rw-r--r--kernel/sched/ext.c191
-rw-r--r--kernel/sched/ext_idle.c2
-rw-r--r--kernel/trace/fprobe.c3
-rw-r--r--kernel/trace/trace_dynevent.c16
-rw-r--r--kernel/trace/trace_dynevent.h1
-rw-r--r--kernel/trace/trace_eprobe.c3
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_probe.c9
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--mm/swapfile.c9
29 files changed, 353 insertions, 115 deletions
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index d1cc0571798b..dffff6763812 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -81,6 +81,7 @@
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
@@ -168,6 +169,7 @@
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 39577f1d079a..18c7811774d3 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -706,6 +706,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
}
#endif
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type);
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
enum aarch64_insn_system_register sysreg);
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index f1524cdeacf1..8fef12626090 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -97,6 +97,9 @@ enum mitigation_state arm64_get_meltdown_state(void);
enum mitigation_state arm64_get_spectre_bhb_state(void);
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
+extern bool __nospectre_bhb;
+u8 get_spectre_bhb_loop_value(void);
+bool is_spectre_bhb_fw_mitigated(void);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index b607f6dfc5e6..edf1783ffc81 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -891,6 +891,7 @@ static u8 spectre_bhb_loop_affected(void)
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
@@ -999,6 +1000,11 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
return true;
}
+u8 get_spectre_bhb_loop_value(void)
+{
+ return max_bhb_k;
+}
+
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
{
const char *v = arm64_get_bp_hardening_vector(slot);
@@ -1016,7 +1022,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
isb();
}
-static bool __read_mostly __nospectre_bhb;
+bool __read_mostly __nospectre_bhb;
static int __init parse_spectre_bhb_param(char *str)
{
__nospectre_bhb = true;
@@ -1094,6 +1100,11 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
update_mitigation_state(&spectre_bhb_state, state);
}
+bool is_spectre_bhb_fw_mitigated(void)
+{
+ return test_bit(BHB_FW, &system_bhb_mitigations);
+}
+
/* Patched to NOP when enabled */
void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
__le32 *origptr,
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index 9bef696e2230..4e298baddc2e 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
*/
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/printk.h>
@@ -1500,43 +1501,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
}
-u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type)
{
- u32 opt;
- u32 insn;
-
switch (type) {
case AARCH64_INSN_MB_SY:
- opt = 0xf;
- break;
+ return 0xf;
case AARCH64_INSN_MB_ST:
- opt = 0xe;
- break;
+ return 0xe;
case AARCH64_INSN_MB_LD:
- opt = 0xd;
- break;
+ return 0xd;
case AARCH64_INSN_MB_ISH:
- opt = 0xb;
- break;
+ return 0xb;
case AARCH64_INSN_MB_ISHST:
- opt = 0xa;
- break;
+ return 0xa;
case AARCH64_INSN_MB_ISHLD:
- opt = 0x9;
- break;
+ return 0x9;
case AARCH64_INSN_MB_NSH:
- opt = 0x7;
- break;
+ return 0x7;
case AARCH64_INSN_MB_NSHST:
- opt = 0x6;
- break;
+ return 0x6;
case AARCH64_INSN_MB_NSHLD:
- opt = 0x5;
- break;
+ return 0x5;
default:
- pr_err("%s: unknown dmb type %d\n", __func__, type);
+ pr_err("%s: unknown barrier type %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
+}
+
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+{
+ u32 opt;
+ u32 insn;
+
+ opt = __get_barrier_crm_val(type);
+ if (opt == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
insn = aarch64_insn_get_dmb_value();
insn &= ~GENMASK(11, 8);
@@ -1545,6 +1544,21 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
return insn;
}
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type)
+{
+ u32 opt, insn;
+
+ opt = __get_barrier_crm_val(type);
+ if (opt == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
+
+ insn = aarch64_insn_get_dsb_base_value();
+ insn &= ~GENMASK(11, 8);
+ insn |= (opt << 8);
+
+ return insn;
+}
+
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
enum aarch64_insn_system_register sysreg)
{
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 70d7c89d3ac9..634d78422adb 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "bpf_jit: " fmt
+#include <linux/arm-smccc.h>
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
@@ -17,6 +18,7 @@
#include <asm/asm-extable.h>
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/text-patching.h>
@@ -939,7 +941,51 @@ static void build_plt(struct jit_ctx *ctx)
plt->target = (u64)&dummy_tramp;
}
-static void build_epilogue(struct jit_ctx *ctx)
+/* Clobbers BPF registers 1-4, aka x0-x3 */
+static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
+{
+ const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */
+ u8 k = get_spectre_bhb_loop_value();
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
+ cpu_mitigations_off() || __nospectre_bhb ||
+ arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
+ return;
+
+ if (capable(CAP_SYS_ADMIN))
+ return;
+
+ if (supports_clearbhb(SCOPE_SYSTEM)) {
+ emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx);
+ return;
+ }
+
+ if (k) {
+ emit_a64_mov_i64(r1, k, ctx);
+ emit(A64_B(1), ctx);
+ emit(A64_SUBS_I(true, r1, r1, 1), ctx);
+ emit(A64_B_(A64_COND_NE, -2), ctx);
+ emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx);
+ emit(aarch64_insn_get_isb_value(), ctx);
+ }
+
+ if (is_spectre_bhb_fw_mitigated()) {
+ emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR,
+ ARM_SMCCC_ARCH_WORKAROUND_3), ctx);
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ emit(aarch64_insn_get_hvc_value(), ctx);
+ break;
+ case SMCCC_CONDUIT_SMC:
+ emit(aarch64_insn_get_smc_value(), ctx);
+ break;
+ default:
+ pr_err_once("Firmware mitigation enabled with unknown conduit\n");
+ }
+ }
+}
+
+static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
{
const u8 r0 = bpf2a64[BPF_REG_0];
const u8 ptr = bpf2a64[TCCNT_PTR];
@@ -952,10 +998,13 @@ static void build_epilogue(struct jit_ctx *ctx)
emit(A64_POP(A64_ZR, ptr, A64_SP), ctx);
+ if (was_classic)
+ build_bhb_mitigation(ctx);
+
/* Restore FP/LR registers */
emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
- /* Set return value */
+ /* Move the return value from bpf:r0 (aka x7) to x0 */
emit(A64_MOV(1, A64_R(0), r0), ctx);
/* Authenticate lr */
@@ -1898,7 +1947,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.epilogue_offset = ctx.idx;
- build_epilogue(&ctx);
+ build_epilogue(&ctx, was_classic);
build_plt(&ctx);
extable_align = __alignof__(struct exception_table_entry);
@@ -1961,7 +2010,7 @@ skip_init_ctx:
goto out_free_hdr;
}
- build_epilogue(&ctx);
+ build_epilogue(&ctx, was_classic);
build_plt(&ctx);
/* Extra pass to validate JITed code. */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 48fd04e90114..45bcff181cba 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -133,7 +133,9 @@ static bool cfi_paranoid __ro_after_init;
#ifdef CONFIG_MITIGATION_ITS
+#ifdef CONFIG_MODULES
static struct module *its_mod;
+#endif
static void *its_page;
static unsigned int its_offset;
@@ -171,6 +173,7 @@ static void *its_init_thunk(void *thunk, int reg)
return thunk + offset;
}
+#ifdef CONFIG_MODULES
void its_init_mod(struct module *mod)
{
if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
@@ -209,6 +212,7 @@ void its_free_mod(struct module *mod)
}
kfree(mod->its_page_array);
}
+#endif /* CONFIG_MODULES */
static void *its_alloc(void)
{
@@ -217,6 +221,7 @@ static void *its_alloc(void)
if (!page)
return NULL;
+#ifdef CONFIG_MODULES
if (its_mod) {
void *tmp = krealloc(its_mod->its_page_array,
(its_mod->its_num_pages+1) * sizeof(void *),
@@ -229,6 +234,7 @@ static void *its_alloc(void)
execmem_make_temp_rw(page, PAGE_SIZE);
}
+#endif /* CONFIG_MODULES */
return no_free_ptr(page);
}
diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c
index c1eccb3c80c5..eaae044e4f82 100644
--- a/drivers/platform/x86/amd/hsmp/acpi.c
+++ b/drivers/platform/x86/amd/hsmp/acpi.c
@@ -27,9 +27,8 @@
#include "hsmp.h"
-#define DRIVER_NAME "amd_hsmp"
+#define DRIVER_NAME "hsmp_acpi"
#define DRIVER_VERSION "2.3"
-#define ACPI_HSMP_DEVICE_HID "AMDI0097"
/* These are the strings specified in ACPI table */
#define MSG_IDOFF_STR "MsgIdOffset"
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.h b/drivers/platform/x86/amd/hsmp/hsmp.h
index af8b21f821d6..d58d4f0c20d5 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.h
+++ b/drivers/platform/x86/amd/hsmp/hsmp.h
@@ -23,6 +23,7 @@
#define HSMP_CDEV_NAME "hsmp_cdev"
#define HSMP_DEVNODE_NAME "hsmp"
+#define ACPI_HSMP_DEVICE_HID "AMDI0097"
struct hsmp_mbaddr_info {
u32 base_addr;
diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c
index b9782a078dbd..81931e808bbc 100644
--- a/drivers/platform/x86/amd/hsmp/plat.c
+++ b/drivers/platform/x86/amd/hsmp/plat.c
@@ -11,6 +11,7 @@
#include <asm/amd_hsmp.h>
+#include <linux/acpi.h>
#include <linux/build_bug.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -266,7 +267,7 @@ static bool legacy_hsmp_support(void)
}
case 0x1A:
switch (boot_cpu_data.x86_model) {
- case 0x00 ... 0x1F:
+ case 0x00 ... 0x0F:
return true;
default:
return false;
@@ -288,6 +289,9 @@ static int __init hsmp_plt_init(void)
return ret;
}
+ if (acpi_dev_present(ACPI_HSMP_DEVICE_HID, NULL, -1))
+ return -ENODEV;
+
hsmp_pdev = get_hsmp_pdev();
if (!hsmp_pdev)
return -ENOMEM;
diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c
index b4f49720c87f..2e3f6fc67c56 100644
--- a/drivers/platform/x86/amd/pmc/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c
@@ -217,6 +217,13 @@ static const struct dmi_system_id fwbug_list[] = {
DMI_MATCH(DMI_BIOS_VERSION, "03.05"),
}
},
+ {
+ .ident = "MECHREVO Wujie 14X (GX4HRXL)",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"),
+ }
+ },
{}
};
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index 14b99d8b63d2..d3bd12ad036a 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -334,6 +334,11 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
return 0;
}
+static inline bool amd_pmf_pb_valid(struct amd_pmf_dev *dev)
+{
+ return memchr_inv(dev->policy_buf, 0xff, dev->policy_sz);
+}
+
#ifdef CONFIG_AMD_PMF_DEBUG
static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev)
{
@@ -361,12 +366,22 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf,
dev->policy_buf = new_policy_buf;
dev->policy_sz = length;
+ if (!amd_pmf_pb_valid(dev)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
amd_pmf_hex_dump_pb(dev);
ret = amd_pmf_start_policy_engine(dev);
if (ret < 0)
- return ret;
+ goto cleanup;
return length;
+
+cleanup:
+ kfree(dev->policy_buf);
+ dev->policy_buf = NULL;
+ return ret;
}
static const struct file_operations pb_fops = {
@@ -528,6 +543,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz);
+ if (!amd_pmf_pb_valid(dev)) {
+ dev_info(dev->dev, "No Smart PC policy present\n");
+ ret = -EINVAL;
+ goto err_free_policy;
+ }
+
amd_pmf_hex_dump_pb(dev);
dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 0c697b46f436..47cc766624d7 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -4779,7 +4779,8 @@ static int asus_wmi_add(struct platform_device *pdev)
goto fail_leds;
asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
- if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+ if ((result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) ==
+ (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
asus->driver->wlan_ctrl_by_user = 1;
if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 5790095c175e..92b21e49faf6 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -11478,6 +11478,8 @@ static int __must_check __init get_thinkpad_model_data(
tp->vendor = PCI_VENDOR_ID_IBM;
else if (dmi_name_in_vendors("LENOVO"))
tp->vendor = PCI_VENDOR_ID_LENOVO;
+ else if (dmi_name_in_vendors("NEC"))
+ tp->vendor = PCI_VENDOR_ID_LENOVO;
else
return 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index 7be23ff20b27..7ba1807145aa 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1220,10 +1220,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh)
/* FIXME: do we need to set this in both places? */
if (bh->b_folio && bh->b_folio->mapping)
mapping_set_error(bh->b_folio->mapping, -EIO);
- if (bh->b_assoc_map) {
+ if (bh->b_assoc_map)
mapping_set_error(bh->b_assoc_map, -EIO);
- errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
- }
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4bc264b854c4..d4dbffdedd08 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2111,9 +2111,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
write_unlock_irq(&ep->lock);
- if (!eavail && ep_schedule_timeout(to))
- timed_out = !schedule_hrtimeout_range(to, slack,
- HRTIMER_MODE_ABS);
+ if (!eavail)
+ timed_out = !ep_schedule_timeout(to) ||
+ !schedule_hrtimeout_range(to, slack,
+ HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
/*
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 4f33a4a48886..b4071c9cf8c9 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -115,7 +115,7 @@ void udf_truncate_tail_extent(struct inode *inode)
}
/* This inode entry is in-memory only and thus we don't have to mark
* the inode dirty */
- if (ret == 0)
+ if (ret >= 0)
iinfo->i_lenExtents = inode->i_size;
brelse(epos.bh);
}
diff --git a/fs/xattr.c b/fs/xattr.c
index fabb2a04501e..8ec5b0204bfd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1428,6 +1428,15 @@ static bool xattr_is_trusted(const char *name)
return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}
+static bool xattr_is_maclabel(const char *name)
+{
+ const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+
+ return !strncmp(name, XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN) &&
+ security_ismaclabel(suffix);
+}
+
/**
* simple_xattr_list - list all xattr objects
* @inode: inode from which to get the xattrs
@@ -1460,6 +1469,17 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (err)
return err;
+ err = security_inode_listsecurity(inode, buffer, remaining_size);
+ if (err < 0)
+ return err;
+
+ if (buffer) {
+ if (remaining_size < err)
+ return -ERANGE;
+ buffer += err;
+ }
+ remaining_size -= err;
+
read_lock(&xattrs->lock);
for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
xattr = rb_entry(rbp, struct simple_xattr, rb_node);
@@ -1468,6 +1488,10 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (!trusted && xattr_is_trusted(xattr->name))
continue;
+ /* skip MAC labels; these are provided by LSM above */
+ if (xattr_is_maclabel(xattr->name))
+ continue;
+
err = xattr_list_one(&buffer, &remaining_size, xattr->name);
if (err)
break;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 306b60430091..24b70ea3e6ce 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1116,9 +1116,11 @@ void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
if (top_cs) {
/*
- * Percpu kthreads in top_cpuset are ignored
+ * PF_NO_SETAFFINITY tasks are ignored.
+ * All per cpu kthreads should have PF_NO_SETAFFINITY
+ * flag set, see kthread_set_per_cpu().
*/
- if (kthread_is_per_cpu(task))
+ if (task->flags & PF_NO_SETAFFINITY)
continue;
cpumask_andnot(new_cpus, possible_mask, subpartitions_cpus);
} else {
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index fdbf249d1c68..f5133249fd4d 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1118,8 +1118,38 @@ static void scx_kf_disallow(u32 mask)
current->scx.kf_mask &= ~mask;
}
-#define SCX_CALL_OP(mask, op, args...) \
+/*
+ * Track the rq currently locked.
+ *
+ * This allows kfuncs to safely operate on rq from any scx ops callback,
+ * knowing which rq is already locked.
+ */
+static DEFINE_PER_CPU(struct rq *, locked_rq);
+
+static inline void update_locked_rq(struct rq *rq)
+{
+ /*
+ * Check whether @rq is actually locked. This can help expose bugs
+ * or incorrect assumptions about the context in which a kfunc or
+ * callback is executed.
+ */
+ if (rq)
+ lockdep_assert_rq_held(rq);
+ __this_cpu_write(locked_rq, rq);
+}
+
+/*
+ * Return the rq currently locked from an scx callback, or NULL if no rq is
+ * locked.
+ */
+static inline struct rq *scx_locked_rq(void)
+{
+ return __this_cpu_read(locked_rq);
+}
+
+#define SCX_CALL_OP(mask, op, rq, args...) \
do { \
+ update_locked_rq(rq); \
if (mask) { \
scx_kf_allow(mask); \
scx_ops.op(args); \
@@ -1127,11 +1157,14 @@ do { \
} else { \
scx_ops.op(args); \
} \
+ update_locked_rq(NULL); \
} while (0)
-#define SCX_CALL_OP_RET(mask, op, args...) \
+#define SCX_CALL_OP_RET(mask, op, rq, args...) \
({ \
__typeof__(scx_ops.op(args)) __ret; \
+ \
+ update_locked_rq(rq); \
if (mask) { \
scx_kf_allow(mask); \
__ret = scx_ops.op(args); \
@@ -1139,6 +1172,7 @@ do { \
} else { \
__ret = scx_ops.op(args); \
} \
+ update_locked_rq(NULL); \
__ret; \
})
@@ -1153,31 +1187,31 @@ do { \
* scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on
* the specific task.
*/
-#define SCX_CALL_OP_TASK(mask, op, task, args...) \
+#define SCX_CALL_OP_TASK(mask, op, rq, task, args...) \
do { \
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
current->scx.kf_tasks[0] = task; \
- SCX_CALL_OP(mask, op, task, ##args); \
+ SCX_CALL_OP(mask, op, rq, task, ##args); \
current->scx.kf_tasks[0] = NULL; \
} while (0)
-#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \
+#define SCX_CALL_OP_TASK_RET(mask, op, rq, task, args...) \
({ \
__typeof__(scx_ops.op(task, ##args)) __ret; \
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
current->scx.kf_tasks[0] = task; \
- __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \
+ __ret = SCX_CALL_OP_RET(mask, op, rq, task, ##args); \
current->scx.kf_tasks[0] = NULL; \
__ret; \
})
-#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \
+#define SCX_CALL_OP_2TASKS_RET(mask, op, rq, task0, task1, args...) \
({ \
__typeof__(scx_ops.op(task0, task1, ##args)) __ret; \
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
current->scx.kf_tasks[0] = task0; \
current->scx.kf_tasks[1] = task1; \
- __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \
+ __ret = SCX_CALL_OP_RET(mask, op, rq, task0, task1, ##args); \
current->scx.kf_tasks[0] = NULL; \
current->scx.kf_tasks[1] = NULL; \
__ret; \
@@ -2172,7 +2206,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
WARN_ON_ONCE(*ddsp_taskp);
*ddsp_taskp = p;
- SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, rq, p, enq_flags);
*ddsp_taskp = NULL;
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -2269,7 +2303,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
add_nr_running(rq, 1);
if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p))
- SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_REST, runnable, rq, p, enq_flags);
if (enq_flags & SCX_ENQ_WAKEUP)
touch_core_sched(rq, p);
@@ -2283,7 +2317,7 @@ out:
__scx_add_event(SCX_EV_SELECT_CPU_FALLBACK, 1);
}
-static void ops_dequeue(struct task_struct *p, u64 deq_flags)
+static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
{
unsigned long opss;
@@ -2304,7 +2338,7 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
BUG();
case SCX_OPSS_QUEUED:
if (SCX_HAS_OP(dequeue))
- SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, rq, p, deq_flags);
if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
SCX_OPSS_NONE))
@@ -2337,7 +2371,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
return true;
}
- ops_dequeue(p, deq_flags);
+ ops_dequeue(rq, p, deq_flags);
/*
* A currently running task which is going off @rq first gets dequeued
@@ -2353,11 +2387,11 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
*/
if (SCX_HAS_OP(stopping) && task_current(rq, p)) {
update_curr_scx(rq);
- SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false);
+ SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, false);
}
if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p))
- SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, rq, p, deq_flags);
if (deq_flags & SCX_DEQ_SLEEP)
p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
@@ -2377,7 +2411,7 @@ static void yield_task_scx(struct rq *rq)
struct task_struct *p = rq->curr;
if (SCX_HAS_OP(yield))
- SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL);
+ SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, p, NULL);
else
p->scx.slice = 0;
}
@@ -2387,7 +2421,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
struct task_struct *from = rq->curr;
if (SCX_HAS_OP(yield))
- return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to);
+ return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, from, to);
else
return false;
}
@@ -2945,7 +2979,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
* emitted in switch_class().
*/
if (SCX_HAS_OP(cpu_acquire))
- SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL);
+ SCX_CALL_OP(SCX_KF_REST, cpu_acquire, rq, cpu_of(rq), NULL);
rq->scx.cpu_released = false;
}
@@ -2990,7 +3024,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
do {
dspc->nr_tasks = 0;
- SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq),
+ SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, rq, cpu_of(rq),
prev_on_scx ? prev : NULL);
flush_dispatch_buf(rq);
@@ -3104,7 +3138,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
* Core-sched might decide to execute @p before it is
* dispatched. Call ops_dequeue() to notify the BPF scheduler.
*/
- ops_dequeue(p, SCX_DEQ_CORE_SCHED_EXEC);
+ ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC);
dispatch_dequeue(rq, p);
}
@@ -3112,7 +3146,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
/* see dequeue_task_scx() on why we skip when !QUEUED */
if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED))
- SCX_CALL_OP_TASK(SCX_KF_REST, running, p);
+ SCX_CALL_OP_TASK(SCX_KF_REST, running, rq, p);
clr_task_runnable(p, true);
@@ -3193,8 +3227,7 @@ static void switch_class(struct rq *rq, struct task_struct *next)
.task = next,
};
- SCX_CALL_OP(SCX_KF_CPU_RELEASE,
- cpu_release, cpu_of(rq), &args);
+ SCX_CALL_OP(SCX_KF_CPU_RELEASE, cpu_release, rq, cpu_of(rq), &args);
}
rq->scx.cpu_released = true;
}
@@ -3207,7 +3240,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
/* see dequeue_task_scx() on why we skip when !QUEUED */
if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED))
- SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true);
+ SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, true);
if (p->scx.flags & SCX_TASK_QUEUED) {
set_task_runnable(rq, p);
@@ -3348,7 +3381,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
* verifier.
*/
if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a)))
- return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before,
+ return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, NULL,
(struct task_struct *)a,
(struct task_struct *)b);
else
@@ -3385,7 +3418,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
*ddsp_taskp = p;
cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
- select_cpu, p, prev_cpu, wake_flags);
+ select_cpu, NULL, p, prev_cpu, wake_flags);
p->scx.selected_cpu = cpu;
*ddsp_taskp = NULL;
if (ops_cpu_valid(cpu, "from ops.select_cpu()"))
@@ -3430,8 +3463,8 @@ static void set_cpus_allowed_scx(struct task_struct *p,
* designation pointless. Cast it away when calling the operation.
*/
if (SCX_HAS_OP(set_cpumask))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
- (struct cpumask *)p->cpus_ptr);
+ SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, NULL,
+ p, (struct cpumask *)p->cpus_ptr);
}
static void handle_hotplug(struct rq *rq, bool online)
@@ -3444,9 +3477,9 @@ static void handle_hotplug(struct rq *rq, bool online)
scx_idle_update_selcpu_topology(&scx_ops);
if (online && SCX_HAS_OP(cpu_online))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, NULL, cpu);
else if (!online && SCX_HAS_OP(cpu_offline))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, NULL, cpu);
else
scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
"cpu %d going %s, exiting scheduler", cpu,
@@ -3550,7 +3583,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
curr->scx.slice = 0;
touch_core_sched(rq, curr);
} else if (SCX_HAS_OP(tick)) {
- SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr);
+ SCX_CALL_OP_TASK(SCX_KF_REST, tick, rq, curr);
}
if (!curr->scx.slice)
@@ -3627,7 +3660,7 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool
.fork = fork,
};
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args);
+ ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, NULL, p, &args);
if (unlikely(ret)) {
ret = ops_sanitize_err("init_task", ret);
return ret;
@@ -3668,9 +3701,10 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool
static void scx_ops_enable_task(struct task_struct *p)
{
+ struct rq *rq = task_rq(p);
u32 weight;
- lockdep_assert_rq_held(task_rq(p));
+ lockdep_assert_rq_held(rq);
/*
* Set the weight before calling ops.enable() so that the scheduler
@@ -3684,20 +3718,22 @@ static void scx_ops_enable_task(struct task_struct *p)
p->scx.weight = sched_weight_to_cgroup(weight);
if (SCX_HAS_OP(enable))
- SCX_CALL_OP_TASK(SCX_KF_REST, enable, p);
+ SCX_CALL_OP_TASK(SCX_KF_REST, enable, rq, p);
scx_set_task_state(p, SCX_TASK_ENABLED);
if (SCX_HAS_OP(set_weight))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
+ SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight);
}
static void scx_ops_disable_task(struct task_struct *p)
{
- lockdep_assert_rq_held(task_rq(p));
+ struct rq *rq = task_rq(p);
+
+ lockdep_assert_rq_held(rq);
WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
if (SCX_HAS_OP(disable))
- SCX_CALL_OP_TASK(SCX_KF_REST, disable, p);
+ SCX_CALL_OP_TASK(SCX_KF_REST, disable, rq, p);
scx_set_task_state(p, SCX_TASK_READY);
}
@@ -3726,7 +3762,7 @@ static void scx_ops_exit_task(struct task_struct *p)
}
if (SCX_HAS_OP(exit_task))
- SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args);
+ SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, task_rq(p), p, &args);
scx_set_task_state(p, SCX_TASK_NONE);
}
@@ -3835,7 +3871,7 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p,
p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight));
if (SCX_HAS_OP(set_weight))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
+ SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight);
}
static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio)
@@ -3851,8 +3887,8 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
* different scheduler class. Keep the BPF scheduler up-to-date.
*/
if (SCX_HAS_OP(set_cpumask))
- SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
- (struct cpumask *)p->cpus_ptr);
+ SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, rq,
+ p, (struct cpumask *)p->cpus_ptr);
}
static void switched_from_scx(struct rq *rq, struct task_struct *p)
@@ -3913,7 +3949,7 @@ int scx_tg_online(struct task_group *tg)
struct scx_cgroup_init_args args =
{ .weight = tg->scx_weight };
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
+ ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL,
tg->css.cgroup, &args);
if (ret)
ret = ops_sanitize_err("cgroup_init", ret);
@@ -3935,7 +3971,7 @@ void scx_tg_offline(struct task_group *tg)
percpu_down_read(&scx_cgroup_rwsem);
if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, tg->css.cgroup);
tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED);
percpu_up_read(&scx_cgroup_rwsem);
@@ -3968,7 +4004,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset)
continue;
if (SCX_HAS_OP(cgroup_prep_move)) {
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move,
+ ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, NULL,
p, from, css->cgroup);
if (ret)
goto err;
@@ -3982,8 +4018,8 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset)
err:
cgroup_taskset_for_each(p, css, tset) {
if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
- p->scx.cgrp_moving_from, css->cgroup);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL,
+ p, p->scx.cgrp_moving_from, css->cgroup);
p->scx.cgrp_moving_from = NULL;
}
@@ -4001,8 +4037,8 @@ void scx_cgroup_move_task(struct task_struct *p)
* cgrp_moving_from set.
*/
if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from))
- SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p,
- p->scx.cgrp_moving_from, tg_cgrp(task_group(p)));
+ SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, NULL,
+ p, p->scx.cgrp_moving_from, tg_cgrp(task_group(p)));
p->scx.cgrp_moving_from = NULL;
}
@@ -4021,8 +4057,8 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset)
cgroup_taskset_for_each(p, css, tset) {
if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
- p->scx.cgrp_moving_from, css->cgroup);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL,
+ p, p->scx.cgrp_moving_from, css->cgroup);
p->scx.cgrp_moving_from = NULL;
}
out_unlock:
@@ -4035,7 +4071,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
if (scx_cgroup_enabled && tg->scx_weight != weight) {
if (SCX_HAS_OP(cgroup_set_weight))
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight,
+ SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
tg_cgrp(tg), weight);
tg->scx_weight = weight;
}
@@ -4224,7 +4260,7 @@ static void scx_cgroup_exit(void)
continue;
rcu_read_unlock();
- SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, css->cgroup);
rcu_read_lock();
css_put(css);
@@ -4261,7 +4297,7 @@ static int scx_cgroup_init(void)
continue;
rcu_read_unlock();
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
+ ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL,
css->cgroup, &args);
if (ret) {
css_put(css);
@@ -4758,7 +4794,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
}
if (scx_ops.exit)
- SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, exit, NULL, ei);
cancel_delayed_work_sync(&scx_watchdog_work);
@@ -4965,7 +5001,7 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
if (SCX_HAS_OP(dump_task)) {
ops_dump_init(s, " ");
- SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p);
+ SCX_CALL_OP(SCX_KF_REST, dump_task, NULL, dctx, p);
ops_dump_exit();
}
@@ -5012,7 +5048,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
if (SCX_HAS_OP(dump)) {
ops_dump_init(&s, "");
- SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx);
+ SCX_CALL_OP(SCX_KF_UNLOCKED, dump, NULL, &dctx);
ops_dump_exit();
}
@@ -5069,7 +5105,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
used = seq_buf_used(&ns);
if (SCX_HAS_OP(dump_cpu)) {
ops_dump_init(&ns, " ");
- SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle);
+ SCX_CALL_OP(SCX_KF_REST, dump_cpu, NULL, &dctx, cpu, idle);
ops_dump_exit();
}
@@ -5328,7 +5364,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_idle_enable(ops);
if (scx_ops.init) {
- ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init);
+ ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init, NULL);
if (ret) {
ret = ops_sanitize_err("init", ret);
cpus_read_unlock();
@@ -6791,6 +6827,12 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id,
BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) !=
__alignof__(struct bpf_iter_scx_dsq));
+ /*
+ * next() and destroy() will be called regardless of the return value.
+ * Always clear $kit->dsq.
+ */
+ kit->dsq = NULL;
+
if (flags & ~__SCX_DSQ_ITER_USER_FLAGS)
return -EINVAL;
@@ -7077,13 +7119,32 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf)
}
if (ops_cpu_valid(cpu, NULL)) {
- struct rq *rq = cpu_rq(cpu);
+ struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq();
+ struct rq_flags rf;
+
+ /*
+ * When called with an rq lock held, restrict the operation
+ * to the corresponding CPU to prevent ABBA deadlocks.
+ */
+ if (locked_rq && rq != locked_rq) {
+ scx_ops_error("Invalid target CPU %d", cpu);
+ return;
+ }
+
+ /*
+ * If no rq lock is held, allow to operate on any CPU by
+ * acquiring the corresponding rq lock.
+ */
+ if (!locked_rq) {
+ rq_lock_irqsave(rq, &rf);
+ update_rq_clock(rq);
+ }
rq->scx.cpuperf_target = perf;
+ cpufreq_update_util(rq, 0);
- rcu_read_lock_sched_notrace();
- cpufreq_update_util(cpu_rq(cpu), 0);
- rcu_read_unlock_sched_notrace();
+ if (!locked_rq)
+ rq_unlock_irqrestore(rq, &rf);
}
}
@@ -7314,12 +7375,6 @@ BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids)
BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE)
-BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE)
-BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE)
-BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE)
-BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle)
-BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU)
-BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_cpu_rq)
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index cb343ca889e0..e67a19a071c1 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -674,7 +674,7 @@ void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
* managed by put_prev_task_idle()/set_next_task_idle().
*/
if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq))
- SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
+ SCX_CALL_OP(SCX_KF_REST, update_idle, rq, cpu_of(rq), idle);
/*
* Update the idle masks:
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 95c6e3473a76..ba7ff14f5339 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -454,7 +454,8 @@ static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *
struct fprobe_hlist_node *node;
int ret = 0;
- hlist_for_each_entry_rcu(node, head, hlist) {
+ hlist_for_each_entry_rcu(node, head, hlist,
+ lockdep_is_held(&fprobe_mutex)) {
if (!within_module(node->addr, mod))
continue;
if (delete_fprobe_node(node))
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index a322e4f249a5..5d64a18cacac 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -16,7 +16,7 @@
#include "trace_output.h" /* for trace_event_sem */
#include "trace_dynevent.h"
-static DEFINE_MUTEX(dyn_event_ops_mutex);
+DEFINE_MUTEX(dyn_event_ops_mutex);
static LIST_HEAD(dyn_event_ops_list);
bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call)
@@ -116,6 +116,20 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
return ret;
}
+/*
+ * Locked version of event creation. The event creation must be protected by
+ * dyn_event_ops_mutex because of protecting trace_probe_log.
+ */
+int dyn_event_create(const char *raw_command, struct dyn_event_operations *type)
+{
+ int ret;
+
+ mutex_lock(&dyn_event_ops_mutex);
+ ret = type->create(raw_command);
+ mutex_unlock(&dyn_event_ops_mutex);
+ return ret;
+}
+
static int create_dyn_event(const char *raw_command)
{
struct dyn_event_operations *ops;
diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h
index 936477a111d3..beee3f8d7544 100644
--- a/kernel/trace/trace_dynevent.h
+++ b/kernel/trace/trace_dynevent.h
@@ -100,6 +100,7 @@ void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos);
void dyn_event_seq_stop(struct seq_file *m, void *v);
int dyn_events_release_all(struct dyn_event_operations *type);
int dyn_event_release(const char *raw_command, struct dyn_event_operations *type);
+int dyn_event_create(const char *raw_command, struct dyn_event_operations *type);
/*
* for_each_dyn_event - iterate over the dyn_event list
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index c08355c3ef32..916555f0de81 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -969,10 +969,13 @@ static int __trace_eprobe_create(int argc, const char *argv[])
goto error;
}
}
+ trace_probe_log_clear();
return ret;
+
parse_error:
ret = -EINVAL;
error:
+ trace_probe_log_clear();
trace_event_probe_cleanup(ep);
return ret;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2703b96d8990..3e5c47b6d7b2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1089,7 +1089,7 @@ static int create_or_delete_trace_kprobe(const char *raw_command)
if (raw_command[0] == '-')
return dyn_event_release(raw_command, &trace_kprobe_ops);
- ret = trace_kprobe_create(raw_command);
+ ret = dyn_event_create(raw_command, &trace_kprobe_ops);
return ret == -ECANCELED ? -EINVAL : ret;
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2eeecb6c95ee..424751cdf31f 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -154,9 +154,12 @@ fail:
}
static struct trace_probe_log trace_probe_log;
+extern struct mutex dyn_event_ops_mutex;
void trace_probe_log_init(const char *subsystem, int argc, const char **argv)
{
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
trace_probe_log.subsystem = subsystem;
trace_probe_log.argc = argc;
trace_probe_log.argv = argv;
@@ -165,11 +168,15 @@ void trace_probe_log_init(const char *subsystem, int argc, const char **argv)
void trace_probe_log_clear(void)
{
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
memset(&trace_probe_log, 0, sizeof(trace_probe_log));
}
void trace_probe_log_set_index(int index)
{
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
trace_probe_log.index = index;
}
@@ -178,6 +185,8 @@ void __trace_probe_log_err(int offset, int err_type)
char *command, *p;
int i, len = 0, pos = 0;
+ lockdep_assert_held(&dyn_event_ops_mutex);
+
if (!trace_probe_log.argv)
return;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 3386439ec9f6..35cf76c75dd7 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -741,7 +741,7 @@ static int create_or_delete_trace_uprobe(const char *raw_command)
if (raw_command[0] == '-')
return dyn_event_release(raw_command, &trace_uprobe_ops);
- ret = trace_uprobe_create(raw_command);
+ ret = dyn_event_create(raw_command, &trace_uprobe_ops);
return ret == -ECANCELED ? -EINVAL : ret;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f214843612dc..412ccd6543b3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3332,6 +3332,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
/*
+ * The swap subsystem needs a major overhaul to support this.
+ * It doesn't work yet so just disable it for now.
+ */
+ if (mapping_min_folio_order(mapping) > 0) {
+ error = -EINVAL;
+ goto bad_swap_unlock_inode;
+ }
+
+ /*
* Read the swap header.
*/
if (!mapping->a_ops->read_folio) {