From da142f3d373a6ddaca0119615a8db2175ddc4121 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 5 Dec 2025 15:26:55 -0800 Subject: KVM: Remove subtle "struct kvm_stats_desc" pseudo-overlay Remove KVM's internal pseudo-overlay of kvm_stats_desc, which subtly aliases the flexible name[] in the uAPI definition with a fixed-size array of the same name. The unusual embedded structure results in compiler warnings due to -Wflex-array-member-not-at-end, and also necessitates an extra level of dereferencing in KVM. To avoid the "overlay", define the uAPI structure to have a fixed-size name when building for the kernel. Opportunistically clean up the indentation for the stats macros, and replace spaces with tabs. No functional change intended. Reported-by: Gustavo A. R. Silva Closes: https://lore.kernel.org/all/aPfNKRpLfhmhYqfP@kspp Acked-by: Marc Zyngier Acked-by: Christian Borntraeger [..] Acked-by: Anup Patel Reviewed-by: Bibo Mao Acked-by: Gustavo A. R. Silva Link: https://patch.msgid.link/20251205232655.445294-1-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dddb781b0507..76bd54848b11 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -14,6 +14,10 @@ #include #include +#ifdef __KERNEL__ +#include +#endif + #define KVM_API_VERSION 12 /* @@ -1579,7 +1583,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) -- cgit v1.2.3 From 3b68df978133ac3d46d570af065a73debbb68248 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 20 Feb 2026 15:06:41 -0500 Subject: rseq: slice ext: Ensure rseq feature size differs from original rseq size Before rseq became extensible, its original size was 32 bytes even though the active rseq area was only 20 bytes. This had the following impact in terms of userspace ecosystem evolution: * The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set to 32, even though the size of the active rseq area is really 20. * The GNU libc 2.40 changes this __rseq_size to 20, thus making it express the active rseq area. * Starting from glibc 2.41, __rseq_size corresponds to the AT_RSEQ_FEATURE_SIZE from getauxval(3). This means that users of __rseq_size can always expect it to correspond to the active rseq area, except for the value 32, for which the active rseq area is 20 bytes. Exposing a 32 bytes feature size would make life needlessly painful for userspace. Therefore, add a reserved field at the end of the rseq area to bump the feature size to 33 bytes. This reserved field is expected to be replaced with whatever field will come next, expecting that this field will be larger than 1 byte. The effect of this change is to increase the size from 32 to 64 bytes before we actually have fields using that memory. Clarify the allocation size and alignment requirements in the struct rseq uapi comment. Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the value of the active rseq area size rounded up to next power of 2, which guarantees that the rseq structure will always be aligned on the nearest power of two large enough to contain it, even as it grows. Change the alignment check in the rseq registration accordingly. This will minimize the amount of ABI corner-cases we need to document and require userspace to play games with. The rule stays simple when __rseq_size != 32: #define rseq_field_available(field) (__rseq_size >= offsetofend(struct rseq_abi, field)) Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com --- fs/binfmt_elf.c | 3 ++- include/linux/rseq.h | 12 ++++++++++++ include/uapi/linux/rseq.h | 26 ++++++++++++++++++++++---- kernel/rseq.c | 3 ++- 4 files changed, 38 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8e89cc5b2820..fb857faaf0d6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, } #ifdef CONFIG_RSEQ NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end)); - NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq)); + NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align()); #endif #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */ diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 7a01a0760405..b9d62fc2140d 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) t->rseq = current->rseq; } +/* + * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq + * registration. This is the active rseq area size rounded up to next + * power of 2, which guarantees that the rseq structure will always be + * aligned on the nearest power of two large enough to contain it, even + * as it grows. + */ +static inline unsigned int rseq_alloc_align(void) +{ + return 1U << get_count_order(offsetof(struct rseq, end)); +} + #else /* CONFIG_RSEQ */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 863c4a00a66b..f69344fe6c08 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -87,10 +87,17 @@ struct rseq_slice_ctrl { }; /* - * struct rseq is aligned on 4 * 8 bytes to ensure it is always - * contained within a single cache-line. + * The original size and alignment of the allocation for struct rseq is + * 32 bytes. * - * A single struct rseq per thread is allowed. + * The allocation size needs to be greater or equal to + * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to + * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32). + * + * As an alternative, userspace is allowed to use both the original size + * and alignment of 32 bytes for backward compatibility. + * + * A single active struct rseq registration per thread is allowed. */ struct rseq { /* @@ -180,10 +187,21 @@ struct rseq { */ struct rseq_slice_ctrl slice_ctrl; + /* + * Before rseq became extensible, its original size was 32 bytes even + * though the active rseq area was only 20 bytes. + * Exposing a 32 bytes feature size would make life needlessly painful + * for userspace. Therefore, add a reserved byte after byte 32 + * to bump the rseq feature size from 32 to 33. + * The next field to be added to the rseq area will be larger + * than one byte, and will replace this reserved byte. + */ + __u8 __reserved; + /* * Flexible array member at end of structure, after last feature field. */ char end[]; -} __attribute__((aligned(4 * sizeof(__u64)))); +} __attribute__((aligned(32))); #endif /* _UAPI_LINUX_RSEQ_H */ diff --git a/kernel/rseq.c b/kernel/rseq.c index e349f86cc945..38d3ef540760 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -456,7 +457,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 */ if (rseq_len < ORIG_RSEQ_SIZE || (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || - (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) || rseq_len < offsetof(struct rseq, end)))) return -EINVAL; if (!access_ok(rseq, rseq_len)) -- cgit v1.2.3 From 39195990e4c093c9eecf88f29811c6de29265214 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Feb 2026 06:10:08 -0600 Subject: PCI: Correct PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fb82437fdd8c ("PCI: Change capability register offsets to hex") incorrectly converted the PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value from decimal 52 to hex 0x32: -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ This broke PCI capabilities in a VMM because subsequent ones weren't DWORD-aligned. Change PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to the correct value of 0x34. fb82437fdd8c was from Baruch Siach , but this was not Baruch's fault; it's a mistake I made when applying the patch. Fixes: fb82437fdd8c ("PCI: Change capability register offsets to hex") Reported-by: David Woodhouse Closes: https://lore.kernel.org/all/3ae392a0158e9d9ab09a1d42150429dd8ca42791.camel@infradead.org Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof WilczyƄski --- include/uapi/linux/pci_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ec1c54b5a310..14f634ab9350 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -712,7 +712,7 @@ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ #define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x34 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ #define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */ -- cgit v1.2.3 From 0ed2e8bf61d6d5df1d78f4e24b682dff4c394e17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 28 Feb 2026 04:56:20 -0700 Subject: io_uring: correct comment for IORING_SETUP_TASKRUN_FLAG Sync with a recent liburing fix, which corrects the comment explaining when the IORING_SETUP_TASKRUN_FLAG setup flag is valid to use. May be use with COOP_TASKRUN or DEFER_TASKRUN, not useful without either of this task_work mechanisms being used. Link: https://github.com/axboe/liburing/pull/1543 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6750c383a2ab..1ff16141c8a5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -188,7 +188,8 @@ enum io_uring_sqe_flags_bit { /* * If COOP_TASKRUN is set, get notified if task work is available for * running and a kernel transition would be needed to run it. This sets - * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + * IORING_SQ_TASKRUN in the sq ring flags. Not valid without COOP_TASKRUN + * or DEFER_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ -- cgit v1.2.3 From 2619da73bb2f10d88f7e1087125c40144fdf0987 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 5 Mar 2026 20:49:55 +0100 Subject: KVM: x86: Use __DECLARE_FLEX_ARRAY() for UAPI structures with VLAs Commit 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") broke the userspace API for C++. These structures ending in VLAs are typically a *header*, which can be followed by an arbitrary number of entries. Userspace typically creates a larger structure with some non-zero number of entries, for example in QEMU's kvm_arch_get_supported_msr_feature(): struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; } msr_data = {}; While that works in C, it fails in C++ with an error like: flexible array member 'kvm_msrs::entries' not at end of 'struct msr_data' Fix this by using __DECLARE_FLEX_ARRAY() for the VLA, which uses [0] for C++ compilation. Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Cc: stable@vger.kernel.org Signed-off-by: David Woodhouse Link: https://patch.msgid.link/3abaf6aefd6e5efeff3b860ac38421d9dec908db.camel@infradead.org [sean: tag for stable@] Signed-off-by: Sean Christopherson --- arch/x86/include/uapi/asm/kvm.h | 12 ++++++------ include/uapi/linux/kvm.h | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0d4538fa6c31..5f2b30d0405c 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -197,13 +197,13 @@ struct kvm_msrs { __u32 nmsrs; /* number of msrs in entries */ __u32 pad; - struct kvm_msr_entry entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_msr_entry, entries); }; /* for KVM_GET_MSR_INDEX_LIST */ struct kvm_msr_list { __u32 nmsrs; /* number of msrs in entries */ - __u32 indices[]; + __DECLARE_FLEX_ARRAY(__u32, indices); }; /* Maximum size of any access bitmap in bytes */ @@ -245,7 +245,7 @@ struct kvm_cpuid_entry { struct kvm_cpuid { __u32 nent; __u32 padding; - struct kvm_cpuid_entry entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_cpuid_entry, entries); }; struct kvm_cpuid_entry2 { @@ -267,7 +267,7 @@ struct kvm_cpuid_entry2 { struct kvm_cpuid2 { __u32 nent; __u32 padding; - struct kvm_cpuid_entry2 entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_cpuid_entry2, entries); }; /* for KVM_GET_PIT and KVM_SET_PIT */ @@ -398,7 +398,7 @@ struct kvm_xsave { * the contents of CPUID leaf 0xD on the host. */ __u32 region[1024]; - __u32 extra[]; + __DECLARE_FLEX_ARRAY(__u32, extra); }; #define KVM_MAX_XCRS 16 @@ -566,7 +566,7 @@ struct kvm_pmu_event_filter { __u32 fixed_counter_bitmap; __u32 flags; __u32 pad[4]; - __u64 events[]; + __DECLARE_FLEX_ARRAY(__u64, events); }; #define KVM_PMU_EVENT_ALLOW 0 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80364d4dbebb..3f0d8d3c3daf 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -542,7 +543,7 @@ struct kvm_coalesced_mmio { struct kvm_coalesced_mmio_ring { __u32 first, last; - struct kvm_coalesced_mmio coalesced_mmio[]; + __DECLARE_FLEX_ARRAY(struct kvm_coalesced_mmio, coalesced_mmio); }; #define KVM_COALESCED_MMIO_MAX \ @@ -592,7 +593,7 @@ struct kvm_clear_dirty_log { /* for KVM_SET_SIGNAL_MASK */ struct kvm_signal_mask { __u32 len; - __u8 sigset[]; + __DECLARE_FLEX_ARRAY(__u8, sigset); }; /* for KVM_TPR_ACCESS_REPORTING */ @@ -1051,7 +1052,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing { __u32 nr; __u32 flags; - struct kvm_irq_routing_entry entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_irq_routing_entry, entries); }; #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) @@ -1142,7 +1143,7 @@ struct kvm_dirty_tlb { struct kvm_reg_list { __u64 n; /* number of regs */ - __u64 reg[]; + __DECLARE_FLEX_ARRAY(__u64, reg); }; struct kvm_one_reg { @@ -1608,7 +1609,7 @@ struct kvm_stats_desc { #ifdef __KERNEL__ char name[KVM_STATS_NAME_SIZE]; #else - char name[]; + __DECLARE_FLEX_ARRAY(char, name); #endif }; -- cgit v1.2.3 From 8f15b5071b4548b0aafc03b366eb45c9c6566704 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 25 Mar 2026 14:11:08 +0100 Subject: netfilter: ctnetlink: use netlink policy range checks Replace manual range and mask validations with netlink policy annotations in ctnetlink code paths, so that the netlink core rejects invalid values early and can generate extack errors. - CTA_PROTOINFO_TCP_STATE: reject values > TCP_CONNTRACK_SYN_SENT2 at policy level, removing the manual >= TCP_CONNTRACK_MAX check. - CTA_PROTOINFO_TCP_WSCALE_ORIGINAL/REPLY: reject values > TCP_MAX_WSCALE (14). The normal TCP option parsing path already clamps to this value, but the ctnetlink path accepted 0-255, causing undefined behavior when used as a u32 shift count. - CTA_FILTER_ORIG_FLAGS/REPLY_FLAGS: use NLA_POLICY_MASK with CTA_FILTER_F_ALL, removing the manual mask checks. - CTA_EXPECT_FLAGS: use NLA_POLICY_MASK with NF_CT_EXPECT_MASK, adding a new mask define grouping all valid expect flags. Extracted from a broader nf-next patch by Florian Westphal, scoped to ctnetlink for the fixes tree. Fixes: c8e2078cfe41 ("[NETFILTER]: ctnetlink: add support for internal tcp connection tracking flags handling") Signed-off-by: David Carlier Co-developed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 4 ++++ net/netfilter/nf_conntrack_netlink.c | 16 +++++----------- net/netfilter/nf_conntrack_proto_tcp.c | 10 +++------- 3 files changed, 12 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 26071021e986..56b6b60a814f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -159,5 +159,9 @@ enum ip_conntrack_expect_events { #define NF_CT_EXPECT_INACTIVE 0x2 #define NF_CT_EXPECT_USERSPACE 0x4 +#ifdef __KERNEL__ +#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \ + NF_CT_EXPECT_USERSPACE) +#endif #endif /* _UAPI_NF_CONNTRACK_COMMON_H */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6e6aeb0ab0a1..3f408f3713bb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -910,8 +910,8 @@ struct ctnetlink_filter { }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { - [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 }, - [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 }, + [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), + [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), }; static int ctnetlink_parse_filter(const struct nlattr *attr, @@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr, if (ret) return ret; - if (tb[CTA_FILTER_ORIG_FLAGS]) { + if (tb[CTA_FILTER_ORIG_FLAGS]) filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); - if (filter->orig_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } - if (tb[CTA_FILTER_REPLY_FLAGS]) { + if (tb[CTA_FILTER_REPLY_FLAGS]) filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); - if (filter->reply_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } return 0; } @@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, - [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, + [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK), [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 0c1d086e96cb..b67426c2189b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1385,9 +1385,9 @@ nla_put_failure: } static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { - [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2), + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; @@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; - if (tb[CTA_PROTOINFO_TCP_STATE] && - nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) - return -EINVAL; - spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); -- cgit v1.2.3 From 45065a5095c7773fb98c35d60c20c3b513540597 Mon Sep 17 00:00:00 2001 From: Akshai Murari Date: Fri, 27 Mar 2026 06:54:45 +0000 Subject: Input: add keycodes for contextual AI usages (HUTRR119) HUTRR119 introduces new usages for keys intended to invoke AI agents based on the current context. These are useful with the increasing number of operating systems with integrated Large Language Models Add new key definitions for KEY_ACTION_ON_SELECTION, KEY_CONTEXTUAL_INSERT and KEY_CONTEXTUAL_QUERY Signed-off-by: Akshai Murari Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 6 ++++++ drivers/hid/hid-input.c | 3 +++ include/uapi/linux/input-event-codes.h | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c5865b0d2aaa..a8d2b36a7852 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -990,6 +990,9 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0x0c, 0x01c9, "ALContactSync" }, { 0x0c, 0x01ca, "ALNavigation" }, { 0x0c, 0x01cb, "ALContextawareDesktopAssistant" }, + { 0x0c, 0x01cc, "ALActionOnSelection" }, + { 0x0c, 0x01cd, "ALContextualInsertion" }, + { 0x0c, 0x01ce, "ALContextualQuery" }, { 0x0c, 0x0200, "GenericGUIApplicationControls" }, { 0x0c, 0x0201, "ACNew" }, { 0x0c, 0x0202, "ACOpen" }, @@ -3375,6 +3378,9 @@ static const char *keys[KEY_MAX + 1] = { [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", + [KEY_ACTION_ON_SELECTION] = "ActionOnSelection", + [KEY_CONTEXTUAL_INSERT] = "ContextualInsert", + [KEY_CONTEXTUAL_QUERY] = "ContextualQuery", [KEY_KBDINPUTASSIST_PREV] = "KbdInputAssistPrev", [KEY_KBDINPUTASSIST_NEXT] = "KbdInputAssistNext", [KEY_KBDINPUTASSIST_PREVGROUP] = "KbdInputAssistPrevGroup", diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 8fc20df99b97..ce9c3251287d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1227,6 +1227,9 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x1bc: map_key_clear(KEY_MESSENGER); break; case 0x1bd: map_key_clear(KEY_INFO); break; case 0x1cb: map_key_clear(KEY_ASSISTANT); break; + case 0x1cc: map_key_clear(KEY_ACTION_ON_SELECTION); break; + case 0x1cd: map_key_clear(KEY_CONTEXTUAL_INSERT); break; + case 0x1ce: map_key_clear(KEY_CONTEXTUAL_QUERY); break; case 0x201: map_key_clear(KEY_NEW); break; case 0x202: map_key_clear(KEY_OPEN); break; case 0x203: map_key_clear(KEY_CLOSE); break; diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4bdb6a165987..3528168f7c6d 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -643,6 +643,10 @@ #define KEY_EPRIVACY_SCREEN_ON 0x252 #define KEY_EPRIVACY_SCREEN_OFF 0x253 +#define KEY_ACTION_ON_SELECTION 0x254 /* AL Action on Selection (HUTRR119) */ +#define KEY_CONTEXTUAL_INSERT 0x255 /* AL Contextual Insertion (HUTRR119) */ +#define KEY_CONTEXTUAL_QUERY 0x256 /* AL Contextual Query (HUTRR119) */ + #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 -- cgit v1.2.3 From 08ee1559052be302f1d3752f48360b89517d9f8d Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: prctl: cfi: change the branch landing pad prctl()s to be more descriptive Per Linus' comments requesting the replacement of "INDIR_BR_LP" in the indirect branch tracking prctl()s with something more readable, and suggesting the use of the speculation control prctl()s as an exemplar, reimplement the prctl()s and related constants that control per-task forward-edge control flow integrity. This primarily involves two changes. First, the prctls are restructured to resemble the style of the speculative execution workaround control prctls PR_{GET,SET}_SPECULATION_CTRL, to make them easier to extend in the future. Second, the "indir_br_lp" abbrevation is expanded to "branch_landing_pads" to be less telegraphic. The kselftest and documentation is adjusted accordingly. Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/ Cc: Deepak Gupta Cc: Linus Torvalds Cc: Mark Brown Signed-off-by: Paul Walmsley --- Documentation/arch/riscv/zicfilp.rst | 63 +++++++++++++--------- arch/riscv/kernel/usercfi.c | 15 +++--- include/uapi/linux/prctl.h | 37 ++++++------- kernel/sys.c | 23 ++++---- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 36 ++++++------- tools/testing/selftests/riscv/cfi/cfitests.c | 4 +- 6 files changed, 91 insertions(+), 87 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/arch/riscv/zicfilp.rst b/Documentation/arch/riscv/zicfilp.rst index 78a3e01ff68c..ab7d8e62ddaf 100644 --- a/Documentation/arch/riscv/zicfilp.rst +++ b/Documentation/arch/riscv/zicfilp.rst @@ -76,34 +76,49 @@ the program. 4. prctl() enabling -------------------- -:c:macro:`PR_SET_INDIR_BR_LP_STATUS` / :c:macro:`PR_GET_INDIR_BR_LP_STATUS` / -:c:macro:`PR_LOCK_INDIR_BR_LP_STATUS` are three prctls added to manage indirect -branch tracking. These prctls are architecture-agnostic and return -EINVAL if -the underlying functionality is not supported. +Per-task indirect branch tracking state can be monitored and +controlled via the :c:macro:`PR_GET_CFI` and :c:macro:`PR_SET_CFI` +``prctl()` arguments (respectively), by supplying +:c:macro:`PR_CFI_BRANCH_LANDING_PADS` as the second argument. These +are architecture-agnostic, and will return -EINVAL if the underlying +functionality is not supported. -* prctl(PR_SET_INDIR_BR_LP_STATUS, unsigned long arg) +* prctl(:c:macro:`PR_SET_CFI`, :c:macro:`PR_CFI_BRANCH_LANDING_PADS`, unsigned long arg) -If arg1 is :c:macro:`PR_INDIR_BR_LP_ENABLE` and if CPU supports -``zicfilp`` then the kernel will enable indirect branch tracking for the -task. The dynamic loader can issue this :c:macro:`prctl` once it has -determined that all the objects loaded in the address space support -indirect branch tracking. Additionally, if there is a `dlopen` to an -object which wasn't compiled with ``zicfilp``, the dynamic loader can -issue this prctl with arg1 set to 0 (i.e. :c:macro:`PR_INDIR_BR_LP_ENABLE` -cleared). - -* prctl(PR_GET_INDIR_BR_LP_STATUS, unsigned long * arg) +arg is a bitmask. -Returns the current status of indirect branch tracking. If enabled -it'll return :c:macro:`PR_INDIR_BR_LP_ENABLE` - -* prctl(PR_LOCK_INDIR_BR_LP_STATUS, unsigned long arg) +If :c:macro:`PR_CFI_ENABLE` is set in arg, and the CPU supports +``zicfilp``, then the kernel will enable indirect branch tracking for +the task. The dynamic loader can issue this ``prctl()`` once it has +determined that all the objects loaded in the address space support +indirect branch tracking. + +Indirect branch tracking state can also be locked once enabled. This +prevents the task from subsequently disabling it. This is done by +setting the bit :c:macro:`PR_CFI_LOCK` in arg. Either indirect branch +tracking must already be enabled for the task, or the bit +:c:macro:`PR_CFI_ENABLE` must also be set in arg. This is intended +for environments that wish to run with a strict security posture that +do not wish to load objects without ``zicfilp`` support. + +Indirect branch tracking can also be disabled for the task, assuming +that it has not previously been enabled and locked. If there is a +``dlopen()`` to an object which wasn't compiled with ``zicfilp``, the +dynamic loader can issue this ``prctl()`` with arg set to +:c:macro:`PR_CFI_DISABLE`. Disabling indirect branch tracking for the +task is not possible if it has previously been enabled and locked. + + +* prctl(:c:macro:`PR_GET_CFI`, :c:macro:`PR_CFI_BRANCH_LANDING_PADS`, unsigned long * arg) + +Returns the current status of indirect branch tracking into a bitmask +stored into the memory location pointed to by arg. The bitmask will +have the :c:macro:`PR_CFI_ENABLE` bit set if indirect branch tracking +is currently enabled for the task, and if it is locked, will +additionally have the :c:macro:`PR_CFI_LOCK` bit set. If indirect +branch tracking is currently disabled for the task, the +:c:macro:`PR_CFI_DISABLE` bit will be set. -Locks the current status of indirect branch tracking on the task. User -space may want to run with a strict security posture and wouldn't want -loading of objects without ``zicfilp`` support in them, to disallow -disabling of indirect branch tracking. In this case, user space can -use this prctl to lock the current settings. 5. violations related to indirect branch tracking -------------------------------------------------- diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 04ab1eb8df29..2c535737511d 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -465,16 +465,14 @@ int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, if (!is_user_lpad_enabled()) return -EINVAL; - /* indirect branch tracking is enabled on the task or not */ - fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); + fcfi_status = (is_indir_lp_enabled(t) ? PR_CFI_ENABLE : PR_CFI_DISABLE); + fcfi_status |= (is_indir_lp_locked(t) ? PR_CFI_LOCK : 0); return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; } int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) { - bool enable_indir_lp = false; - if (!is_user_lpad_enabled()) return -EINVAL; @@ -482,12 +480,13 @@ int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long if (is_indir_lp_locked(t)) return -EINVAL; - /* Reject unknown flags */ - if (state & ~PR_INDIR_BR_LP_ENABLE) + if (!(state & (PR_CFI_ENABLE | PR_CFI_DISABLE))) + return -EINVAL; + + if (state & PR_CFI_ENABLE && state & PR_CFI_DISABLE) return -EINVAL; - enable_indir_lp = (state & PR_INDIR_BR_LP_ENABLE); - set_indir_lp_status(t, enable_indir_lp); + set_indir_lp_status(t, !!(state & PR_CFI_ENABLE)); return 0; } diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 55b0446fff9d..b6ec6f693719 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -397,30 +397,23 @@ struct prctl_mm_map { # define PR_RSEQ_SLICE_EXT_ENABLE 0x01 /* - * Get the current indirect branch tracking configuration for the current - * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + * Get or set the control flow integrity (CFI) configuration for the + * current thread. + * + * Some per-thread control flow integrity settings are not yet + * controlled through this prctl(); see for example + * PR_{GET,SET,LOCK}_SHADOW_STACK_STATUS */ -#define PR_GET_INDIR_BR_LP_STATUS 80 - +#define PR_GET_CFI 80 +#define PR_SET_CFI 81 /* - * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will - * enable cpu feature for user thread, to track all indirect branches and ensure - * they land on arch defined landing pad instruction. - * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. - * arch64 - If enabled, an indirect branch must land on a BTI instruction. - * riscv - If enabled, an indirect branch must land on an lpad instruction. - * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect - * branches will no more be tracked by cpu to land on arch defined landing pad - * instruction. - */ -#define PR_SET_INDIR_BR_LP_STATUS 81 -# define PR_INDIR_BR_LP_ENABLE (1UL << 0) - -/* - * Prevent further changes to the specified indirect branch tracking - * configuration. All bits may be locked via this call, including - * undefined bits. + * Forward-edge CFI variants (excluding ARM64 BTI, which has its own + * prctl()s). */ -#define PR_LOCK_INDIR_BR_LP_STATUS 82 +#define PR_CFI_BRANCH_LANDING_PADS 0 +/* Return and control values for PR_{GET,SET}_CFI */ +# define PR_CFI_ENABLE _BITUL(0) +# define PR_CFI_DISABLE _BITUL(1) +# define PR_CFI_LOCK _BITUL(2) #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index a5e0c187bbbf..62e842055cc9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2889,20 +2889,23 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = rseq_slice_extension_prctl(arg2, arg3); break; - case PR_GET_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + case PR_GET_CFI: + if (arg2 != PR_CFI_BRANCH_LANDING_PADS) return -EINVAL; - error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg2); - break; - case PR_SET_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + if (arg4 || arg5) return -EINVAL; - error = arch_prctl_set_branch_landing_pad_state(me, arg2); + error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg3); break; - case PR_LOCK_INDIR_BR_LP_STATUS: - if (arg2 || arg3 || arg4 || arg5) + case PR_SET_CFI: + if (arg2 != PR_CFI_BRANCH_LANDING_PADS) return -EINVAL; - error = arch_prctl_lock_branch_landing_pad_state(me); + if (arg4 || arg5) + return -EINVAL; + error = arch_prctl_set_branch_landing_pad_state(me, arg3); + if (error) + break; + if (arg3 & PR_CFI_LOCK && !(arg3 & PR_CFI_DISABLE)) + error = arch_prctl_lock_branch_landing_pad_state(me); break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 55b0446fff9d..560f99bc4782 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -397,30 +397,24 @@ struct prctl_mm_map { # define PR_RSEQ_SLICE_EXT_ENABLE 0x01 /* - * Get the current indirect branch tracking configuration for the current - * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + * Get or set the control flow integrity (CFI) configuration for the + * current thread. + * + * Some per-thread control flow integrity settings are not yet + * controlled through this prctl(); see for example + * PR_{GET,SET,LOCK}_SHADOW_STACK_STATUS */ -#define PR_GET_INDIR_BR_LP_STATUS 80 - +#define PR_GET_CFI 80 +#define PR_SET_CFI 81 /* - * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will - * enable cpu feature for user thread, to track all indirect branches and ensure - * they land on arch defined landing pad instruction. - * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. - * arch64 - If enabled, an indirect branch must land on a BTI instruction. - * riscv - If enabled, an indirect branch must land on an lpad instruction. - * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect - * branches will no more be tracked by cpu to land on arch defined landing pad - * instruction. + * Forward-edge CFI variants (excluding ARM64 BTI, which has its own + * prctl()s). */ -#define PR_SET_INDIR_BR_LP_STATUS 81 -# define PR_INDIR_BR_LP_ENABLE (1UL << 0) +#define PR_CFI_BRANCH_LANDING_PADS 0 +/* Return and control values for PR_{GET,SET}_CFI */ +# define PR_CFI_ENABLE _BITUL(0) +# define PR_CFI_DISABLE _BITUL(1) +# define PR_CFI_LOCK _BITUL(2) -/* - * Prevent further changes to the specified indirect branch tracking - * configuration. All bits may be locked via this call, including - * undefined bits. - */ -#define PR_LOCK_INDIR_BR_LP_STATUS 82 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/testing/selftests/riscv/cfi/cfitests.c b/tools/testing/selftests/riscv/cfi/cfitests.c index 0dac74b8553c..39d097b6881f 100644 --- a/tools/testing/selftests/riscv/cfi/cfitests.c +++ b/tools/testing/selftests/riscv/cfi/cfitests.c @@ -146,11 +146,11 @@ int main(int argc, char *argv[]) * pads for user mode except lighting up a bit in senvcfg via a prctl. * Enable landing pad support throughout the execution of the test binary. */ - ret = my_syscall5(__NR_prctl, PR_GET_INDIR_BR_LP_STATUS, &lpad_status, 0, 0, 0); + ret = my_syscall5(__NR_prctl, PR_GET_CFI, PR_CFI_BRANCH_LANDING_PADS, &lpad_status, 0, 0); if (ret) ksft_exit_fail_msg("Get landing pad status failed with %d\n", ret); - if (!(lpad_status & PR_INDIR_BR_LP_ENABLE)) + if (!(lpad_status & PR_CFI_ENABLE)) ksft_exit_fail_msg("Landing pad is not enabled, should be enabled via glibc\n"); ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0); -- cgit v1.2.3