From 98b9f207afa53aff2edb0e52910c4348b456b37d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 22 Dec 2025 09:04:13 +0100 Subject: dmaengine: idxd: uapi: use UAPI types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using libc types and headers from the UAPI headers is problematic as it introduces a dependency on a full C toolchain. Use the fixed-width integer types provided by the UAPI headers instead. Signed-off-by: Thomas Weißschuh Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20251222-uapi-idxd-v1-1-baa183adb20d@linutronix.de Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 270 +++++++++++++++++++++++----------------------- 1 file changed, 133 insertions(+), 137 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 3d1987e1bb2d..fdcc8eefb925 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -3,11 +3,7 @@ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ -#ifdef __KERNEL__ #include -#else -#include -#endif /* Driver command error status */ enum idxd_scmd_stat { @@ -176,132 +172,132 @@ enum iax_completion_status { #define DSA_COMP_STATUS(status) ((status) & DSA_COMP_STATUS_MASK) struct dsa_hw_desc { - uint32_t pasid:20; - uint32_t rsvd:11; - uint32_t priv:1; - uint32_t flags:24; - uint32_t opcode:8; - uint64_t completion_addr; + __u32 pasid:20; + __u32 rsvd:11; + __u32 priv:1; + __u32 flags:24; + __u32 opcode:8; + __u64 completion_addr; union { - uint64_t src_addr; - uint64_t rdback_addr; - uint64_t pattern; - uint64_t desc_list_addr; - uint64_t pattern_lower; - uint64_t transl_fetch_addr; + __u64 src_addr; + __u64 rdback_addr; + __u64 pattern; + __u64 desc_list_addr; + __u64 pattern_lower; + __u64 transl_fetch_addr; }; union { - uint64_t dst_addr; - uint64_t rdback_addr2; - uint64_t src2_addr; - uint64_t comp_pattern; + __u64 dst_addr; + __u64 rdback_addr2; + __u64 src2_addr; + __u64 comp_pattern; }; union { - uint32_t xfer_size; - uint32_t desc_count; - uint32_t region_size; + __u32 xfer_size; + __u32 desc_count; + __u32 region_size; }; - uint16_t int_handle; - uint16_t rsvd1; + __u16 int_handle; + __u16 rsvd1; union { - uint8_t expected_res; + __u8 expected_res; /* create delta record */ struct { - uint64_t delta_addr; - uint32_t max_delta_size; - uint32_t delt_rsvd; - uint8_t expected_res_mask; + __u64 delta_addr; + __u32 max_delta_size; + __u32 delt_rsvd; + __u8 expected_res_mask; }; - uint32_t delta_rec_size; - uint64_t dest2; + __u32 delta_rec_size; + __u64 dest2; /* CRC */ struct { - uint32_t crc_seed; - uint32_t crc_rsvd; - uint64_t seed_addr; + __u32 crc_seed; + __u32 crc_rsvd; + __u64 seed_addr; }; /* DIF check or strip */ struct { - uint8_t src_dif_flags; - uint8_t dif_chk_res; - uint8_t dif_chk_flags; - uint8_t dif_chk_res2[5]; - uint32_t chk_ref_tag_seed; - uint16_t chk_app_tag_mask; - uint16_t chk_app_tag_seed; + __u8 src_dif_flags; + __u8 dif_chk_res; + __u8 dif_chk_flags; + __u8 dif_chk_res2[5]; + __u32 chk_ref_tag_seed; + __u16 chk_app_tag_mask; + __u16 chk_app_tag_seed; }; /* DIF insert */ struct { - uint8_t dif_ins_res; - uint8_t dest_dif_flag; - uint8_t dif_ins_flags; - uint8_t dif_ins_res2[13]; - uint32_t ins_ref_tag_seed; - uint16_t ins_app_tag_mask; - uint16_t ins_app_tag_seed; + __u8 dif_ins_res; + __u8 dest_dif_flag; + __u8 dif_ins_flags; + __u8 dif_ins_res2[13]; + __u32 ins_ref_tag_seed; + __u16 ins_app_tag_mask; + __u16 ins_app_tag_seed; }; /* DIF update */ struct { - uint8_t src_upd_flags; - uint8_t upd_dest_flags; - uint8_t dif_upd_flags; - uint8_t dif_upd_res[5]; - uint32_t src_ref_tag_seed; - uint16_t src_app_tag_mask; - uint16_t src_app_tag_seed; - uint32_t dest_ref_tag_seed; - uint16_t dest_app_tag_mask; - uint16_t dest_app_tag_seed; + __u8 src_upd_flags; + __u8 upd_dest_flags; + __u8 dif_upd_flags; + __u8 dif_upd_res[5]; + __u32 src_ref_tag_seed; + __u16 src_app_tag_mask; + __u16 src_app_tag_seed; + __u32 dest_ref_tag_seed; + __u16 dest_app_tag_mask; + __u16 dest_app_tag_seed; }; /* Fill */ - uint64_t pattern_upper; + __u64 pattern_upper; /* Translation fetch */ struct { - uint64_t transl_fetch_res; - uint32_t region_stride; + __u64 transl_fetch_res; + __u32 region_stride; }; /* DIX generate */ struct { - uint8_t dix_gen_res; - uint8_t dest_dif_flags; - uint8_t dif_flags; - uint8_t dix_gen_res2[13]; - uint32_t ref_tag_seed; - uint16_t app_tag_mask; - uint16_t app_tag_seed; + __u8 dix_gen_res; + __u8 dest_dif_flags; + __u8 dif_flags; + __u8 dix_gen_res2[13]; + __u32 ref_tag_seed; + __u16 app_tag_mask; + __u16 app_tag_seed; }; - uint8_t op_specific[24]; + __u8 op_specific[24]; }; } __attribute__((packed)); struct iax_hw_desc { - uint32_t pasid:20; - uint32_t rsvd:11; - uint32_t priv:1; - uint32_t flags:24; - uint32_t opcode:8; - uint64_t completion_addr; - uint64_t src1_addr; - uint64_t dst_addr; - uint32_t src1_size; - uint16_t int_handle; + __u32 pasid:20; + __u32 rsvd:11; + __u32 priv:1; + __u32 flags:24; + __u32 opcode:8; + __u64 completion_addr; + __u64 src1_addr; + __u64 dst_addr; + __u32 src1_size; + __u16 int_handle; union { - uint16_t compr_flags; - uint16_t decompr_flags; + __u16 compr_flags; + __u16 decompr_flags; }; - uint64_t src2_addr; - uint32_t max_dst_size; - uint32_t src2_size; - uint32_t filter_flags; - uint32_t num_inputs; + __u64 src2_addr; + __u32 max_dst_size; + __u32 src2_size; + __u32 filter_flags; + __u32 num_inputs; } __attribute__((packed)); struct dsa_raw_desc { - uint64_t field[8]; + __u64 field[8]; } __attribute__((packed)); /* @@ -309,91 +305,91 @@ struct dsa_raw_desc { * volatile and prevent the compiler from optimize the read. */ struct dsa_completion_record { - volatile uint8_t status; + volatile __u8 status; union { - uint8_t result; - uint8_t dif_status; + __u8 result; + __u8 dif_status; }; - uint8_t fault_info; - uint8_t rsvd; + __u8 fault_info; + __u8 rsvd; union { - uint32_t bytes_completed; - uint32_t descs_completed; + __u32 bytes_completed; + __u32 descs_completed; }; - uint64_t fault_addr; + __u64 fault_addr; union { /* common record */ struct { - uint32_t invalid_flags:24; - uint32_t rsvd2:8; + __u32 invalid_flags:24; + __u32 rsvd2:8; }; - uint32_t delta_rec_size; - uint64_t crc_val; + __u32 delta_rec_size; + __u64 crc_val; /* DIF check & strip */ struct { - uint32_t dif_chk_ref_tag; - uint16_t dif_chk_app_tag_mask; - uint16_t dif_chk_app_tag; + __u32 dif_chk_ref_tag; + __u16 dif_chk_app_tag_mask; + __u16 dif_chk_app_tag; }; /* DIF insert */ struct { - uint64_t dif_ins_res; - uint32_t dif_ins_ref_tag; - uint16_t dif_ins_app_tag_mask; - uint16_t dif_ins_app_tag; + __u64 dif_ins_res; + __u32 dif_ins_ref_tag; + __u16 dif_ins_app_tag_mask; + __u16 dif_ins_app_tag; }; /* DIF update */ struct { - uint32_t dif_upd_src_ref_tag; - uint16_t dif_upd_src_app_tag_mask; - uint16_t dif_upd_src_app_tag; - uint32_t dif_upd_dest_ref_tag; - uint16_t dif_upd_dest_app_tag_mask; - uint16_t dif_upd_dest_app_tag; + __u32 dif_upd_src_ref_tag; + __u16 dif_upd_src_app_tag_mask; + __u16 dif_upd_src_app_tag; + __u32 dif_upd_dest_ref_tag; + __u16 dif_upd_dest_app_tag_mask; + __u16 dif_upd_dest_app_tag; }; /* DIX generate */ struct { - uint64_t dix_gen_res; - uint32_t dix_ref_tag; - uint16_t dix_app_tag_mask; - uint16_t dix_app_tag; + __u64 dix_gen_res; + __u32 dix_ref_tag; + __u16 dix_app_tag_mask; + __u16 dix_app_tag; }; - uint8_t op_specific[16]; + __u8 op_specific[16]; }; } __attribute__((packed)); struct dsa_raw_completion_record { - uint64_t field[4]; + __u64 field[4]; } __attribute__((packed)); struct iax_completion_record { - volatile uint8_t status; - uint8_t error_code; - uint8_t fault_info; - uint8_t rsvd; - uint32_t bytes_completed; - uint64_t fault_addr; - uint32_t invalid_flags; - uint32_t rsvd2; - uint32_t output_size; - uint8_t output_bits; - uint8_t rsvd3; - uint16_t xor_csum; - uint32_t crc; - uint32_t min; - uint32_t max; - uint32_t sum; - uint64_t rsvd4[2]; + volatile __u8 status; + __u8 error_code; + __u8 fault_info; + __u8 rsvd; + __u32 bytes_completed; + __u64 fault_addr; + __u32 invalid_flags; + __u32 rsvd2; + __u32 output_size; + __u8 output_bits; + __u8 rsvd3; + __u16 xor_csum; + __u32 crc; + __u32 min; + __u32 max; + __u32 sum; + __u64 rsvd4[2]; } __attribute__((packed)); struct iax_raw_completion_record { - uint64_t field[8]; + __u64 field[8]; } __attribute__((packed)); #endif -- cgit v1.2.3 From 40fc797ba18328e57ed1cb213b4b5e48f86f4c7c Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 15 Dec 2025 18:17:09 +0000 Subject: binder: fix trivial typo in uapi header As reported by codespell: include/uapi/linux/android/binder.h:281: interupted ==> interrupted Signed-off-by: Carlos Llamas Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251215181724.3811977-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 03ee4c7010d7..701cad36de43 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -278,7 +278,7 @@ enum { * NOTE: Two special error codes you should check for when calling * in to the driver are: * - * EINTR -- The operation has been interupted. This should be + * EINTR -- The operation has been interrupted. This should be * handled by retrying the ioctl() until a different error code * is returned. * -- cgit v1.2.3 From da142f3d373a6ddaca0119615a8db2175ddc4121 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 5 Dec 2025 15:26:55 -0800 Subject: KVM: Remove subtle "struct kvm_stats_desc" pseudo-overlay Remove KVM's internal pseudo-overlay of kvm_stats_desc, which subtly aliases the flexible name[] in the uAPI definition with a fixed-size array of the same name. The unusual embedded structure results in compiler warnings due to -Wflex-array-member-not-at-end, and also necessitates an extra level of dereferencing in KVM. To avoid the "overlay", define the uAPI structure to have a fixed-size name when building for the kernel. Opportunistically clean up the indentation for the stats macros, and replace spaces with tabs. No functional change intended. Reported-by: Gustavo A. R. Silva Closes: https://lore.kernel.org/all/aPfNKRpLfhmhYqfP@kspp Acked-by: Marc Zyngier Acked-by: Christian Borntraeger [..] Acked-by: Anup Patel Reviewed-by: Bibo Mao Acked-by: Gustavo A. R. Silva Link: https://patch.msgid.link/20251205232655.445294-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/arm64/kvm/guest.c | 4 +-- arch/loongarch/kvm/vcpu.c | 2 +- arch/loongarch/kvm/vm.c | 2 +- arch/mips/kvm/mips.c | 4 +-- arch/powerpc/kvm/book3s.c | 4 +-- arch/powerpc/kvm/booke.c | 4 +-- arch/riscv/kvm/vcpu.c | 2 +- arch/riscv/kvm/vm.c | 2 +- arch/s390/kvm/kvm-s390.c | 4 +-- arch/x86/kvm/x86.c | 4 +-- include/linux/kvm_host.h | 83 ++++++++++++++++++++--------------------------- include/uapi/linux/kvm.h | 8 +++++ virt/kvm/binary_stats.c | 2 +- virt/kvm/kvm_main.c | 20 ++++++------ 14 files changed, 70 insertions(+), 75 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1c87699fd886..332c453b87cf 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -29,7 +29,7 @@ #include "trace.h" -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -42,7 +42,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, hvc_exit_stat), STATS_DESC_COUNTER(VCPU, wfe_exit_stat), diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 656b954c1134..b3547bd41d21 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -14,7 +14,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, int_exits), STATS_DESC_COUNTER(VCPU, idle_exits), diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 194ccbcdc3b3..7deff56e0e1a 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -10,7 +10,7 @@ #include #include -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, pages), STATS_DESC_ICOUNTER(VM, hugepages), diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b0fb92fda4d4..23e69baad453 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -38,7 +38,7 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -51,7 +51,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, wait_exits), STATS_DESC_COUNTER(VCPU, cache_exits), diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d79c5d1098c0..2efbe05caed7 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -38,7 +38,7 @@ /* #define EXIT_DEBUG */ -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) @@ -53,7 +53,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, sum_exits), STATS_DESC_COUNTER(VCPU, mmio_exits), diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3401b96be475..f3ddb24ece74 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -36,7 +36,7 @@ unsigned long kvmppc_booke_handlers; -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) @@ -51,7 +51,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, sum_exits), STATS_DESC_COUNTER(VCPU, mmio_exits), diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index a55a95da54d0..fdd99ac1e714 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -24,7 +24,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, ecall_exit_stat), STATS_DESC_COUNTER(VCPU, wfi_exit_stat), diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 66d91ae6e9b2..715a06ae8c13 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -13,7 +13,7 @@ #include #include -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 56a50524b3ee..495141bf0398 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -64,7 +64,7 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, inject_io), STATS_DESC_COUNTER(VM, inject_float_mchk), @@ -90,7 +90,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, exit_userspace), STATS_DESC_COUNTER(VCPU, exit_null), diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ff8812f3a129..69e0a033e4ac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -237,7 +237,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv); bool __read_mostly enable_device_posted_irqs = true; EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs); -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, mmu_shadow_zapped), STATS_DESC_COUNTER(VM, mmu_pte_write), @@ -263,7 +263,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, pf_taken), STATS_DESC_COUNTER(VCPU, pf_fixed), diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d93f75b05ae2..7428d9949382 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1927,56 +1927,43 @@ enum kvm_stat_kind { struct kvm_stat_data { struct kvm *kvm; - const struct _kvm_stats_desc *desc; + const struct kvm_stats_desc *desc; enum kvm_stat_kind kind; }; -struct _kvm_stats_desc { - struct kvm_stats_desc desc; - char name[KVM_STATS_NAME_SIZE]; -}; - -#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ - .flags = type | unit | base | \ - BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ - BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ - BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ - .exponent = exp, \ - .size = sz, \ +#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ + .flags = type | unit | base | \ + BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ + BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ + BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ + .exponent = exp, \ + .size = sz, \ .bucket_size = bsz -#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, stat) \ - }, \ - .name = #stat, \ - } +#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, generic.stat), \ + .name = #stat, \ +} +#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, generic.stat), \ + .name = #stat, \ +} +#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, stat), \ + .name = #stat, \ +} +#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, stat), \ + .name = #stat, \ +} /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) @@ -2053,7 +2040,7 @@ struct _kvm_stats_desc { STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, - const struct _kvm_stats_desc *desc, + const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset); @@ -2098,9 +2085,9 @@ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value) extern const struct kvm_stats_header kvm_vm_stats_header; -extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; +extern const struct kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; -extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; +extern const struct kvm_stats_desc kvm_vcpu_stats_desc[]; #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dddb781b0507..76bd54848b11 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -14,6 +14,10 @@ #include #include +#ifdef __KERNEL__ +#include +#endif + #define KVM_API_VERSION 12 /* @@ -1579,7 +1583,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) diff --git a/virt/kvm/binary_stats.c b/virt/kvm/binary_stats.c index eefca6c69f51..76ce697c773b 100644 --- a/virt/kvm/binary_stats.c +++ b/virt/kvm/binary_stats.c @@ -50,7 +50,7 @@ * Return: the number of bytes that has been successfully read */ ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, - const struct _kvm_stats_desc *desc, + const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5b5b69c97665..cf65fd82d36d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -983,9 +983,9 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) kvm_free_memslot(kvm, memslot); } -static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc) +static umode_t kvm_stats_debugfs_mode(const struct kvm_stats_desc *desc) { - switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { + switch (desc->flags & KVM_STATS_TYPE_MASK) { case KVM_STATS_TYPE_INSTANT: return 0444; case KVM_STATS_TYPE_CUMULATIVE: @@ -1020,7 +1020,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; - const struct _kvm_stats_desc *pdesc; + const struct kvm_stats_desc *pdesc; int i, ret = -ENOMEM; int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; @@ -6186,11 +6186,11 @@ static int kvm_stat_data_get(void *data, u64 *val) switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_get_stat_per_vm(stat_data->kvm, - stat_data->desc->desc.offset, val); + stat_data->desc->offset, val); break; case KVM_STAT_VCPU: r = kvm_get_stat_per_vcpu(stat_data->kvm, - stat_data->desc->desc.offset, val); + stat_data->desc->offset, val); break; } @@ -6208,11 +6208,11 @@ static int kvm_stat_data_clear(void *data, u64 val) switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_clear_stat_per_vm(stat_data->kvm, - stat_data->desc->desc.offset); + stat_data->desc->offset); break; case KVM_STAT_VCPU: r = kvm_clear_stat_per_vcpu(stat_data->kvm, - stat_data->desc->desc.offset); + stat_data->desc->offset); break; } @@ -6360,7 +6360,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) static void kvm_init_debug(void) { const struct file_operations *fops; - const struct _kvm_stats_desc *pdesc; + const struct kvm_stats_desc *pdesc; int i; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); @@ -6373,7 +6373,7 @@ static void kvm_init_debug(void) fops = &vm_stat_readonly_fops; debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm_debugfs_dir, - (void *)(long)pdesc->desc.offset, fops); + (void *)(long)pdesc->offset, fops); } for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { @@ -6384,7 +6384,7 @@ static void kvm_init_debug(void) fops = &vcpu_stat_readonly_fops; debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm_debugfs_dir, - (void *)(long)pdesc->desc.offset, fops); + (void *)(long)pdesc->offset, fops); } } -- cgit v1.2.3 From 0e6b7eae1fded85f94a357d6132f07d64c614cfa Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Mon, 26 Jan 2026 12:56:57 +0100 Subject: fs: add FS_XFLAG_VERITY for fs-verity files fs-verity introduced inode flag for inodes with enabled fs-verity on them. This patch adds FS_XFLAG_VERITY file attribute which can be retrieved with FS_IOC_FSGETXATTR ioctl() and file_getattr() syscall. This flag is read-only and can not be set with corresponding set ioctl() and file_setattr(). The FS_IOC_SETFLAGS requires file to be opened for writing which is not allowed for verity files. The FS_IOC_FSSETXATTR and file_setattr() clears this flag from the user input. As this is now common flag for both flag interfaces (flags/xflags) add it to overlapping flags list to exclude it from overwrite. Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260126115658.27656-2-aalbersh@kernel.org Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- Documentation/filesystems/fsverity.rst | 16 ++++++++++++++++ fs/file_attr.c | 4 ++++ include/linux/fileattr.h | 6 +++--- include/uapi/linux/fs.h | 1 + 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst index 412cf11e3298..22b49b295d1f 100644 --- a/Documentation/filesystems/fsverity.rst +++ b/Documentation/filesystems/fsverity.rst @@ -341,6 +341,22 @@ the file has fs-verity enabled. This can perform better than FS_IOC_GETFLAGS and FS_IOC_MEASURE_VERITY because it doesn't require opening the file, and opening verity files can be expensive. +FS_IOC_FSGETXATTR +----------------- + +Since Linux v7.0, the FS_IOC_FSGETXATTR ioctl sets FS_XFLAG_VERITY (0x00020000) +in the returned flags when the file has verity enabled. Note that this attribute +cannot be set with FS_IOC_FSSETXATTR as enabling verity requires input +parameters. See FS_IOC_ENABLE_VERITY. + +file_getattr +------------ + +Since Linux v7.0, the file_getattr() syscall sets FS_XFLAG_VERITY (0x00020000) +in the returned flags when the file has verity enabled. Note that this attribute +cannot be set with file_setattr() as enabling verity requires input parameters. +See FS_IOC_ENABLE_VERITY. + .. _accessing_verity_files: Accessing verity files diff --git a/fs/file_attr.c b/fs/file_attr.c index f3704881c126..dfde87401817 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -36,6 +36,8 @@ void fileattr_fill_xflags(struct file_kattr *fa, u32 xflags) fa->flags |= FS_DAX_FL; if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) fa->flags |= FS_PROJINHERIT_FL; + if (fa->fsx_xflags & FS_XFLAG_VERITY) + fa->flags |= FS_VERITY_FL; } EXPORT_SYMBOL(fileattr_fill_xflags); @@ -66,6 +68,8 @@ void fileattr_fill_flags(struct file_kattr *fa, u32 flags) fa->fsx_xflags |= FS_XFLAG_DAX; if (fa->flags & FS_PROJINHERIT_FL) fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; + if (fa->flags & FS_VERITY_FL) + fa->fsx_xflags |= FS_XFLAG_VERITY; } EXPORT_SYMBOL(fileattr_fill_flags); diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index f89dcfad3f8f..3780904a63a6 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -7,16 +7,16 @@ #define FS_COMMON_FL \ (FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NODUMP_FL | FS_NOATIME_FL | FS_DAX_FL | \ - FS_PROJINHERIT_FL) + FS_PROJINHERIT_FL | FS_VERITY_FL) #define FS_XFLAG_COMMON \ (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \ FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \ - FS_XFLAG_PROJINHERIT) + FS_XFLAG_PROJINHERIT | FS_XFLAG_VERITY) /* Read-only inode flags */ #define FS_XFLAG_RDONLY_MASK \ - (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR) + (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR | FS_XFLAG_VERITY) /* Flags to indicate valid value of fsx_ fields */ #define FS_XFLAG_VALUES_MASK \ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 66ca526cf786..70b2b661f42c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -253,6 +253,7 @@ struct file_attr { #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is -- cgit v1.2.3 From c29214677a9fc1a3a4ee65e189afeb5fd10d676f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 15 Feb 2026 21:34:28 +0000 Subject: io_uring/query: return support for custom rx page size Add an ability to query if the zcrx rx page size setting is available. Note, even when the API is supported by io_uring, the registration can still get rejected for various reasons, e.g. when the NIC or the driver doesn't support it, when the particular specified size is unsupported, when the memory area doesn't satisfy all requirements, etc. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ include/uapi/linux/io_uring/query.h | 3 ++- io_uring/query.c | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index da5156954731..c462bdf3c42c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1090,6 +1090,14 @@ enum zcrx_reg_flags { ZCRX_REG_IMPORT = 1, }; +enum zcrx_features { + /* + * The user can ask for the desired rx page size by passing the + * value in struct io_uring_zcrx_ifq_reg::rx_buf_len. + */ + ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0, +}; + /* * Argument for IORING_REGISTER_ZCRX_IFQ */ diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 2456e6c5ebb5..0b6248175e26 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -50,7 +50,8 @@ struct io_uring_query_zcrx { __u64 area_flags; /* The number of supported ZCRX_CTRL_* opcodes */ __u32 nr_ctrl_opcodes; - __u32 __resv1; + /* Bitmask of ZCRX_FEATURE_* indicating which features are available */ + __u32 features; /* The refill ring header size */ __u32 rq_hdr_size; /* The alignment for the header */ diff --git a/io_uring/query.c b/io_uring/query.c index abdd6f3e1223..63cc30c9803d 100644 --- a/io_uring/query.c +++ b/io_uring/query.c @@ -39,7 +39,7 @@ static ssize_t io_query_zcrx(union io_query_data *data) e->nr_ctrl_opcodes = __ZCRX_CTRL_LAST; e->rq_hdr_size = sizeof(struct io_uring); e->rq_hdr_alignment = L1_CACHE_BYTES; - e->__resv1 = 0; + e->features = ZCRX_FEATURE_RX_PAGE_SIZE; e->__resv2 = 0; return sizeof(*e); } -- cgit v1.2.3 From 6b34f8edf8b807b7f87901623aa52dfa1b29ef93 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 15 Feb 2026 21:38:09 +0000 Subject: io_uring/query: add query.h copyright notice Add a copyright notice to io_uring's query uapi header. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/query.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 0b6248175e26..95500759cc13 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ /* * Header file for the io_uring query interface. + * + * Copyright (C) 2026 Pavel Begunkov + * Copyright (C) Meta Platforms, Inc. */ #ifndef LINUX_IO_URING_QUERY_H #define LINUX_IO_URING_QUERY_H -- cgit v1.2.3 From be3573124e630736d2d39650b12f5ef220b47ac1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Feb 2026 10:00:44 -0700 Subject: io_uring/bpf_filter: pass in expected filter payload size It's quite possible that opcodes that have payloads attached to them, like IORING_OP_OPENAT/OPENAT2 or IORING_OP_SOCKET, that these paylods can change over time. For example, on the openat/openat2 side, the struct open_how argument is extensible, and could be extended in the future to allow further arguments to be passed in. Allow registration of a cBPF filter to give the size of the filter as seen by userspace. If that filter is for an opcode that takes extra payload data, allow it if the application payload expectation is the same size than the kernels. If that is the case, the kernel supports filtering on the payload that the application expects. If the size differs, the behavior depends on the IO_URING_BPF_FILTER_SZ_STRICT flag: 1) If IO_URING_BPF_FILTER_SZ_STRICT is set and the size expectation differs, fail the attempt to load the filter. 2) If IO_URING_BPF_FILTER_SZ_STRICT isn't set, allow the filter if the userspace pdu size is smaller than what the kernel offers. 3) Regardless if IO_URING_BPF_FILTER_SZ_STRICT, fail loading the filter if the userspace pdu size is bigger than what the kernel supports. An attempt to load a filter due to sizing will error with -EMSGSIZE. For that error, the registration struct will have filter->pdu_size populated with the pdu size that the kernel uses. Reported-by: Christian Brauner Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/bpf_filter.h | 8 +++- io_uring/bpf_filter.c | 65 ++++++++++++++++++++++++-------- 2 files changed, 56 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h index 220351b81bc0..1b461d792a7b 100644 --- a/include/uapi/linux/io_uring/bpf_filter.h +++ b/include/uapi/linux/io_uring/bpf_filter.h @@ -35,13 +35,19 @@ enum { * If set, any currently unset opcode will have a deny filter attached */ IO_URING_BPF_FILTER_DENY_REST = 1, + /* + * If set, if kernel and application don't agree on pdu_size for + * the given opcode, fail the registration of the filter. + */ + IO_URING_BPF_FILTER_SZ_STRICT = 2, }; struct io_uring_bpf_filter { __u32 opcode; /* io_uring opcode to filter */ __u32 flags; __u32 filter_len; /* number of BPF instructions */ - __u32 resv; + __u8 pdu_size; /* expected pdu size for opcode */ + __u8 resv[3]; __u64 filter_ptr; /* pointer to BPF filter */ __u64 resv2[5]; }; diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c index 8ac7d06de122..28a23e92ee81 100644 --- a/io_uring/bpf_filter.c +++ b/io_uring/bpf_filter.c @@ -308,36 +308,69 @@ err: return ERR_PTR(-EBUSY); } -#define IO_URING_BPF_FILTER_FLAGS IO_URING_BPF_FILTER_DENY_REST +#define IO_URING_BPF_FILTER_FLAGS (IO_URING_BPF_FILTER_DENY_REST | \ + IO_URING_BPF_FILTER_SZ_STRICT) -int io_register_bpf_filter(struct io_restriction *res, - struct io_uring_bpf __user *arg) +static int io_bpf_filter_import(struct io_uring_bpf *reg, + struct io_uring_bpf __user *arg) { - struct io_bpf_filters *filters, *old_filters = NULL; - struct io_bpf_filter *filter, *old_filter; - struct io_uring_bpf reg; - struct bpf_prog *prog; - struct sock_fprog fprog; + const struct io_issue_def *def; int ret; - if (copy_from_user(®, arg, sizeof(reg))) + if (copy_from_user(reg, arg, sizeof(*reg))) return -EFAULT; - if (reg.cmd_type != IO_URING_BPF_CMD_FILTER) + if (reg->cmd_type != IO_URING_BPF_CMD_FILTER) return -EINVAL; - if (reg.cmd_flags || reg.resv) + if (reg->cmd_flags || reg->resv) return -EINVAL; - if (reg.filter.opcode >= IORING_OP_LAST) + if (reg->filter.opcode >= IORING_OP_LAST) return -EINVAL; - if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS) + if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS) return -EINVAL; - if (reg.filter.resv) + if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv))) return -EINVAL; - if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2))) + if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2))) return -EINVAL; - if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS) + if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS) return -EINVAL; + /* Verify filter size */ + def = &io_issue_defs[array_index_nospec(reg->filter.opcode, IORING_OP_LAST)]; + + /* same size, always ok */ + ret = 0; + if (reg->filter.pdu_size == def->filter_pdu_size) + ; + /* size differs, fail in strict mode */ + else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT) + ret = -EMSGSIZE; + /* userspace filter is bigger, always disallow */ + else if (reg->filter.pdu_size > def->filter_pdu_size) + ret = -EMSGSIZE; + + /* copy back kernel filter size */ + reg->filter.pdu_size = def->filter_pdu_size; + if (copy_to_user(&arg->filter, ®->filter, sizeof(reg->filter))) + return -EFAULT; + + return ret; +} + +int io_register_bpf_filter(struct io_restriction *res, + struct io_uring_bpf __user *arg) +{ + struct io_bpf_filters *filters, *old_filters = NULL; + struct io_bpf_filter *filter, *old_filter; + struct io_uring_bpf reg; + struct bpf_prog *prog; + struct sock_fprog fprog; + int ret; + + ret = io_bpf_filter_import(®, arg); + if (ret) + return ret; + fprog.len = reg.filter.filter_len; fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr); -- cgit v1.2.3 From 4edd4ba71ce0df015303dba75ea9d20d1a217546 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 14 Feb 2026 15:54:06 +0100 Subject: include: uapi: netfilter_bridge.h: Cover for musl libc Musl defines its own struct ethhdr and thus defines __UAPI_DEF_ETHHDR to zero. To avoid struct redefinition errors, user space is therefore supposed to include netinet/if_ether.h before (or instead of) linux/if_ether.h. To relieve them from this burden, include the libc header here if not building for kernel space. Reported-by: Alyssa Ross Suggested-by: Florian Westphal Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter_bridge.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h index f6e8d1e05c97..758de72b2764 100644 --- a/include/uapi/linux/netfilter_bridge.h +++ b/include/uapi/linux/netfilter_bridge.h @@ -5,6 +5,10 @@ /* bridge-specific defines for netfilter. */ +#ifndef __KERNEL__ +#include /* for __UAPI_DEF_ETHHDR if defined */ +#endif + #include #include #include -- cgit v1.2.3 From a284dbc96a47891a7a595a1c81b1e2da4d309cf6 Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Wed, 18 Feb 2026 14:47:59 +0000 Subject: mshv: Add nested virtualization creation flag Introduce HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE to indicate support for nested virtualization during partition creation. This enables clearer configuration and capability checks for nested virtualization scenarios. Signed-off-by: Stanislav Kinsburskii Signed-off-by: Muminul Islam Signed-off-by: Wei Liu --- drivers/hv/mshv_root_main.c | 2 ++ include/hyperv/hvhdk.h | 1 + include/uapi/linux/mshv.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index e5d94398528e..e490f8e5a8a5 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -1947,6 +1947,8 @@ static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags, *pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE; if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES)) *pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED; + if (args.pt_flags & BIT(MSHV_PT_BIT_NESTED_VIRTUALIZATION)) + *pt_flags |= HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE; isol_props->as_uint64 = 0; diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 79d1f16a850a..f139c7c5bb2d 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -335,6 +335,7 @@ union hv_partition_isolation_properties { #define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2 /* Note: Exo partition is enabled by default */ +#define HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE BIT(1) #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4) #define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8) #define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13) diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index dee3ece28ce5..7ef5dd67a232 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -27,6 +27,7 @@ enum { MSHV_PT_BIT_X2APIC, MSHV_PT_BIT_GPA_SUPER_PAGES, MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES, + MSHV_PT_BIT_NESTED_VIRTUALIZATION, MSHV_PT_BIT_COUNT, }; -- cgit v1.2.3 From 8927a108a7662eb83eb667bc0c5a0633397122b1 Mon Sep 17 00:00:00 2001 From: Anatol Belski Date: Wed, 18 Feb 2026 14:48:02 +0000 Subject: mshv: Add SMT_ENABLED_GUEST partition creation flag Add support for HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST to allow userspace VMMs to enable SMT for guest partitions. Expose this via new MSHV_PT_BIT_SMT_ENABLED_GUEST flag in the UAPI. Without this flag, the hypervisor schedules guest VPs incorrectly, causing SMT unusable. Signed-off-by: Anatol Belski Signed-off-by: Wei Liu --- drivers/hv/mshv_root_main.c | 2 ++ include/hyperv/hvhdk.h | 1 + include/uapi/linux/mshv.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index e490f8e5a8a5..192467a25f66 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -1949,6 +1949,8 @@ static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags, *pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED; if (args.pt_flags & BIT(MSHV_PT_BIT_NESTED_VIRTUALIZATION)) *pt_flags |= HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE; + if (args.pt_flags & BIT(MSHV_PT_BIT_SMT_ENABLED_GUEST)) + *pt_flags |= HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST; isol_props->as_uint64 = 0; diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index f139c7c5bb2d..245f3db53bf1 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -335,6 +335,7 @@ union hv_partition_isolation_properties { #define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2 /* Note: Exo partition is enabled by default */ +#define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST BIT(0) #define HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE BIT(1) #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4) #define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8) diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 7ef5dd67a232..e0645a34b55b 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -28,6 +28,7 @@ enum { MSHV_PT_BIT_GPA_SUPER_PAGES, MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES, MSHV_PT_BIT_NESTED_VIRTUALIZATION, + MSHV_PT_BIT_SMT_ENABLED_GUEST, MSHV_PT_BIT_COUNT, }; -- cgit v1.2.3 From 3b68df978133ac3d46d570af065a73debbb68248 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 20 Feb 2026 15:06:41 -0500 Subject: rseq: slice ext: Ensure rseq feature size differs from original rseq size Before rseq became extensible, its original size was 32 bytes even though the active rseq area was only 20 bytes. This had the following impact in terms of userspace ecosystem evolution: * The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set to 32, even though the size of the active rseq area is really 20. * The GNU libc 2.40 changes this __rseq_size to 20, thus making it express the active rseq area. * Starting from glibc 2.41, __rseq_size corresponds to the AT_RSEQ_FEATURE_SIZE from getauxval(3). This means that users of __rseq_size can always expect it to correspond to the active rseq area, except for the value 32, for which the active rseq area is 20 bytes. Exposing a 32 bytes feature size would make life needlessly painful for userspace. Therefore, add a reserved field at the end of the rseq area to bump the feature size to 33 bytes. This reserved field is expected to be replaced with whatever field will come next, expecting that this field will be larger than 1 byte. The effect of this change is to increase the size from 32 to 64 bytes before we actually have fields using that memory. Clarify the allocation size and alignment requirements in the struct rseq uapi comment. Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the value of the active rseq area size rounded up to next power of 2, which guarantees that the rseq structure will always be aligned on the nearest power of two large enough to contain it, even as it grows. Change the alignment check in the rseq registration accordingly. This will minimize the amount of ABI corner-cases we need to document and require userspace to play games with. The rule stays simple when __rseq_size != 32: #define rseq_field_available(field) (__rseq_size >= offsetofend(struct rseq_abi, field)) Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com --- fs/binfmt_elf.c | 3 ++- include/linux/rseq.h | 12 ++++++++++++ include/uapi/linux/rseq.h | 26 ++++++++++++++++++++++---- kernel/rseq.c | 3 ++- 4 files changed, 38 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8e89cc5b2820..fb857faaf0d6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, } #ifdef CONFIG_RSEQ NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end)); - NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq)); + NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align()); #endif #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */ diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 7a01a0760405..b9d62fc2140d 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) t->rseq = current->rseq; } +/* + * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq + * registration. This is the active rseq area size rounded up to next + * power of 2, which guarantees that the rseq structure will always be + * aligned on the nearest power of two large enough to contain it, even + * as it grows. + */ +static inline unsigned int rseq_alloc_align(void) +{ + return 1U << get_count_order(offsetof(struct rseq, end)); +} + #else /* CONFIG_RSEQ */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 863c4a00a66b..f69344fe6c08 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -87,10 +87,17 @@ struct rseq_slice_ctrl { }; /* - * struct rseq is aligned on 4 * 8 bytes to ensure it is always - * contained within a single cache-line. + * The original size and alignment of the allocation for struct rseq is + * 32 bytes. * - * A single struct rseq per thread is allowed. + * The allocation size needs to be greater or equal to + * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to + * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32). + * + * As an alternative, userspace is allowed to use both the original size + * and alignment of 32 bytes for backward compatibility. + * + * A single active struct rseq registration per thread is allowed. */ struct rseq { /* @@ -180,10 +187,21 @@ struct rseq { */ struct rseq_slice_ctrl slice_ctrl; + /* + * Before rseq became extensible, its original size was 32 bytes even + * though the active rseq area was only 20 bytes. + * Exposing a 32 bytes feature size would make life needlessly painful + * for userspace. Therefore, add a reserved byte after byte 32 + * to bump the rseq feature size from 32 to 33. + * The next field to be added to the rseq area will be larger + * than one byte, and will replace this reserved byte. + */ + __u8 __reserved; + /* * Flexible array member at end of structure, after last feature field. */ char end[]; -} __attribute__((aligned(4 * sizeof(__u64)))); +} __attribute__((aligned(32))); #endif /* _UAPI_LINUX_RSEQ_H */ diff --git a/kernel/rseq.c b/kernel/rseq.c index e349f86cc945..38d3ef540760 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -456,7 +457,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 */ if (rseq_len < ORIG_RSEQ_SIZE || (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || - (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) || rseq_len < offsetof(struct rseq, end)))) return -EINVAL; if (!access_ok(rseq, rseq_len)) -- cgit v1.2.3 From 39195990e4c093c9eecf88f29811c6de29265214 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Feb 2026 06:10:08 -0600 Subject: PCI: Correct PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fb82437fdd8c ("PCI: Change capability register offsets to hex") incorrectly converted the PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value from decimal 52 to hex 0x32: -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ This broke PCI capabilities in a VMM because subsequent ones weren't DWORD-aligned. Change PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to the correct value of 0x34. fb82437fdd8c was from Baruch Siach , but this was not Baruch's fault; it's a mistake I made when applying the patch. Fixes: fb82437fdd8c ("PCI: Change capability register offsets to hex") Reported-by: David Woodhouse Closes: https://lore.kernel.org/all/3ae392a0158e9d9ab09a1d42150429dd8ca42791.camel@infradead.org Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- include/uapi/linux/pci_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ec1c54b5a310..14f634ab9350 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -712,7 +712,7 @@ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ #define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x34 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ #define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */ -- cgit v1.2.3 From 0ed2e8bf61d6d5df1d78f4e24b682dff4c394e17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 28 Feb 2026 04:56:20 -0700 Subject: io_uring: correct comment for IORING_SETUP_TASKRUN_FLAG Sync with a recent liburing fix, which corrects the comment explaining when the IORING_SETUP_TASKRUN_FLAG setup flag is valid to use. May be use with COOP_TASKRUN or DEFER_TASKRUN, not useful without either of this task_work mechanisms being used. Link: https://github.com/axboe/liburing/pull/1543 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6750c383a2ab..1ff16141c8a5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -188,7 +188,8 @@ enum io_uring_sqe_flags_bit { /* * If COOP_TASKRUN is set, get notified if task work is available for * running and a kernel transition would be needed to run it. This sets - * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + * IORING_SQ_TASKRUN in the sq ring flags. Not valid without COOP_TASKRUN + * or DEFER_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ -- cgit v1.2.3 From a116bac87118903925108e57781bbfc7a7eea27b Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Mon, 2 Mar 2026 16:23:09 -0800 Subject: dma-buf: Include ioctl.h in UAPI header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit include/uapi/linux/dma-buf.h uses several macros from ioctl.h to define its ioctl commands. However, it does not include ioctl.h itself. So, if userspace source code tries to include the dma-buf.h file without including ioctl.h, it can result in build failures. Therefore, include ioctl.h in the dma-buf UAPI header. Signed-off-by: Isaac J. Manjarres Reviewed-by: T.J. Mercier Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20260303002309.1401849-1-isaacmanjarres@google.com --- include/uapi/linux/dma-buf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 5a6fda66d9ad..e827c9d20c5d 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -20,6 +20,7 @@ #ifndef _DMA_BUF_UAPI_H_ #define _DMA_BUF_UAPI_H_ +#include #include /** -- cgit v1.2.3