From fc1a05de0058bc1dbbc202f6f6cdb25ee0bae16d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 22 Jan 2026 11:48:49 +0100 Subject: tools: update mount.h header Update the mount.h header so we can rely on it in the selftests. Link: https://patch.msgid.link/20260122-work-fsmount-namespace-v1-4-5ef0a886e646@kernel.org Signed-off-by: Christian Brauner --- tools/include/uapi/linux/mount.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 7fa67c2031a5..2204708dbf7a 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -61,7 +61,8 @@ /* * open_tree() flags. */ -#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLONE (1 << 0) /* Clone the target tree and attach the clone */ +#define OPEN_TREE_NAMESPACE (1 << 1) /* Clone the target tree into a new mount namespace */ #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ /* @@ -109,6 +110,7 @@ enum fsconfig_command { * fsmount() flags. */ #define FSMOUNT_CLOEXEC 0x00000001 +#define FSMOUNT_NAMESPACE 0x00000002 /* Create the mount in a new mount namespace */ /* * Mount attributes. @@ -197,7 +199,10 @@ struct statmount { */ struct mnt_id_req { __u32 size; - __u32 spare; + union { + __u32 mnt_ns_fd; + __u32 mnt_fd; + }; __u64 mnt_id; __u64 param; __u64 mnt_ns_id; @@ -232,4 +237,9 @@ struct mnt_id_req { #define LSMT_ROOT 0xffffffffffffffff /* root mount */ #define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +/* + * @flag bits for statmount(2) + */ +#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */ + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From c547c51ff4d44c787330506737c5ce7808e536cc Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 19 Mar 2026 15:51:47 +0000 Subject: KVM: arm64: gic-v5: Add ARM_VGIC_V5 device to KVM headers This is the base GICv5 device which is to be used with the KVM_CREATE_DEVICE ioctl to create a GICv5-based vgic. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260319154937.3619520-9-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 2 ++ tools/include/uapi/linux/kvm.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80364d4dbebb..d0c0c8605976 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1224,6 +1224,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_PCHPIC, #define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_ARM_VGIC_V5, +#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_MAX, diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 65500f5db379..713e4360eca0 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1220,6 +1220,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_PCHPIC, #define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_ARM_VGIC_V5, +#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_MAX, -- cgit v1.2.3 From d2d7561dc656748f592cc34d34bf5db8d5c67f7b Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 5 Mar 2026 10:31:42 +0100 Subject: tools uapi headers: add linux/module_signature.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This header is going to be used from scripts/sign-file. Signed-off-by: Thomas Weißschuh Reviewed-by: Petr Pavlu Reviewed-by: Nicolas Schier Signed-off-by: Sami Tolvanen --- tools/include/uapi/linux/module_signature.h | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tools/include/uapi/linux/module_signature.h (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/module_signature.h b/tools/include/uapi/linux/module_signature.h new file mode 100644 index 000000000000..634c9f1c8fc2 --- /dev/null +++ b/tools/include/uapi/linux/module_signature.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Module signature handling. + * + * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _UAPI_LINUX_MODULE_SIGNATURE_H +#define _UAPI_LINUX_MODULE_SIGNATURE_H + +#include + +/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ +#define MODULE_SIGNATURE_MARKER "~Module signature appended~\n" + +enum module_signature_type { + MODULE_SIGNATURE_TYPE_PKCS7 = 2, /* Signature in PKCS#7 message */ +}; + +/* + * Module signature information block. + * + * The constituents of the signature section are, in order: + * + * - Signer's name + * - Key identifier + * - Signature data + * - Information block + */ +struct module_signature { + __u8 algo; /* Public-key crypto algorithm [0] */ + __u8 hash; /* Digest algorithm [0] */ + __u8 id_type; /* Key identifier type [enum module_signature_type] */ + __u8 signer_len; /* Length of signer's name [0] */ + __u8 key_id_len; /* Length of key identifier [0] */ + __u8 __pad[3]; + __be32 sig_len; /* Length of signature data */ +}; + +#endif /* _UAPI_LINUX_MODULE_SIGNATURE_H */ -- cgit v1.2.3 From 222edc843c158666d2d71793b4b7d0bbb9801883 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 26 Mar 2026 14:54:36 +0000 Subject: btf: Add BTF kind layout encoding to UAPI BTF kind layouts provide information to parse BTF kinds. By separating parsing BTF from using all the information it provides, we allow BTF to encode new features even if they cannot be used by readers. This will be helpful in particular for cases where older tools are used to parse newer BTF with kinds the older tools do not recognize; the BTF can still be parsed in such cases using kind layout. The intent is to support encoding of kind layouts optionally so that tools like pahole can add this information. For each kind, we record - length of singular element following struct btf_type - length of each of the btf_vlen() elements following - a (currently unused) flags field The ideas here were discussed at [1], [2]; hence Suggested-by: Andrii Nakryiko Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260326145444.2076244-2-alan.maguire@oracle.com [1] https://lore.kernel.org/bpf/CAEf4BzYjWHRdNNw4B=eOXOs_ONrDwrgX4bn=Nuc1g8JPFC34MA@mail.gmail.com/ [2] https://lore.kernel.org/bpf/20230531201936.1992188-1-alan.maguire@oracle.com/ --- include/uapi/linux/btf.h | 12 ++++++++++++ tools/include/uapi/linux/btf.h | 12 ++++++++++++ 2 files changed, 24 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 266d4ffa6c07..638615ebddc2 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -8,6 +8,16 @@ #define BTF_MAGIC 0xeB9F #define BTF_VERSION 1 +/* + * BTF layout section consists of a struct btf_layout for each known + * kind at BTF encoding time. + */ +struct btf_layout { + __u8 info_sz; /* size of singular element after btf_type */ + __u8 elem_sz; /* size of each of btf_vlen(t) elements */ + __u16 flags; /* currently unused */ +}; + struct btf_header { __u16 magic; __u8 version; @@ -19,6 +29,8 @@ struct btf_header { __u32 type_len; /* length of type section */ __u32 str_off; /* offset of string section */ __u32 str_len; /* length of string section */ + __u32 layout_off; /* offset of layout section */ + __u32 layout_len; /* length of layout section */ }; /* Max # of type identifier */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 266d4ffa6c07..638615ebddc2 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -8,6 +8,16 @@ #define BTF_MAGIC 0xeB9F #define BTF_VERSION 1 +/* + * BTF layout section consists of a struct btf_layout for each known + * kind at BTF encoding time. + */ +struct btf_layout { + __u8 info_sz; /* size of singular element after btf_type */ + __u8 elem_sz; /* size of each of btf_vlen(t) elements */ + __u16 flags; /* currently unused */ +}; + struct btf_header { __u16 magic; __u8 version; @@ -19,6 +29,8 @@ struct btf_header { __u32 type_len; /* length of type section */ __u32 str_off; /* offset of string section */ __u32 str_len; /* length of string section */ + __u32 layout_off; /* offset of layout section */ + __u32 layout_len; /* length of layout section */ }; /* Max # of type identifier */ -- cgit v1.2.3 From e5bbb35a07b3b9fe7b5e70b641344db68593215d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 7 Mar 2026 09:47:52 +0100 Subject: tools headers UAPI: sync linux/taskstats.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "tools/getdelays: use the static UAPI headers from tools/include/uapi". The include directory ../../usr/include is only present if an in-tree kernel build with CONFIG_HEADERS_INSTALL was done before. Otherwise the system UAPI headers are used, which most likely are not the most recent ones. To make sure to always have access to up-to-date UAPI headers, use the static copy in tools/include/uapi. This patch (of 2): To give the accounting tools access to the new fields introduced in commit 503efe850c74 ("delayacct: add timestamp of delay max") Link: https://lkml.kernel.org/r/20260307-accounting-taskstats-h-v1-0-0b75915c6ce5@weissschuh.net Link: https://lkml.kernel.org/r/20260307-accounting-taskstats-h-v1-1-0b75915c6ce5@weissschuh.net Signed-off-by: Thomas Weißschuh Cc: Arnd Bergmann Cc: Balbir Singh Cc: Jiang Kun Cc: Wang Yaxin Cc: xu xin Cc: Yang Yang Cc: kernel test robot Signed-off-by: Andrew Morton --- tools/include/uapi/linux/taskstats.h | 291 +++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 tools/include/uapi/linux/taskstats.h (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/taskstats.h b/tools/include/uapi/linux/taskstats.h new file mode 100644 index 000000000000..3ae25f3ce067 --- /dev/null +++ b/tools/include/uapi/linux/taskstats.h @@ -0,0 +1,291 @@ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ +/* taskstats.h - exporting per-task statistics + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2006 + * (C) Balbir Singh, IBM Corp. 2006 + * (C) Jay Lan, SGI, 2006 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef _LINUX_TASKSTATS_H +#define _LINUX_TASKSTATS_H + +#include +#include + +/* Format for per-task data returned to userland when + * - a task exits + * - listener requests stats for a task + * + * The struct is versioned. Newer versions should only add fields to + * the bottom of the struct to maintain backward compatibility. + * + * + * To add new fields + * a) bump up TASKSTATS_VERSION + * b) add comment indicating new version number at end of struct + * c) add new fields after version comment; maintain 64-bit alignment + */ + + +#define TASKSTATS_VERSION 17 +#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN + * in linux/sched.h */ + +struct taskstats { + + /* The version number of this struct. This field is always set to + * TAKSTATS_VERSION, which is defined in . + * Each time the struct is changed, the value should be incremented. + */ + __u16 version; + __u32 ac_exitcode; /* Exit status */ + + /* The accounting flags of a task as defined in + * Defined values are AFORK, ASU, ACOMPAT, ACORE, AXSIG, and AGROUP. + * (AGROUP since version 12). + */ + __u8 ac_flag; /* Record flags */ + __u8 ac_nice; /* task_nice */ + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count __attribute__((aligned(8))); + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ + + /* Basic Accounting Fields start */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + __u8 ac_sched __attribute__((aligned(8))); + /* Scheduling discipline */ + __u8 ac_pad[3]; + __u32 ac_uid __attribute__((aligned(8))); + /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + /* __u32 range means times from 1970 to 2106 */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + __u64 ac_etime __attribute__((aligned(8))); + /* Elapsed time [usec] */ + __u64 ac_utime; /* User CPU time [usec] */ + __u64 ac_stime; /* SYstem CPU time [usec] */ + __u64 ac_minflt; /* Minor Page Fault Count */ + __u64 ac_majflt; /* Major Page Fault Count */ + /* Basic Accounting Fields end */ + + /* Extended accounting fields start */ + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. + * The current rss usage is added to this counter every time + * a tick is charged to a task's system time. So, at the end we + * will have memory usage multiplied by system time. Thus an + * average usage per system time unit can be calculated. + */ + __u64 coremem; /* accumulated RSS usage in MB-usec */ + /* Accumulated virtual memory usage in duration of a task. + * Same as acct_rss_mem1 above except that we keep track of VM usage. + */ + __u64 virtmem; /* accumulated VM usage in MB-usec */ + + /* High watermark of RSS and virtual memory usage in duration of + * a task, in KBytes. + */ + __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ + __u64 hiwater_vm; /* High-water VM usage, in KB */ + + /* The following four fields are I/O statistics of a task. */ + __u64 read_char; /* bytes read */ + __u64 write_char; /* bytes written */ + __u64 read_syscalls; /* read syscalls */ + __u64 write_syscalls; /* write syscalls */ + /* Extended accounting fields end */ + +#define TASKSTATS_HAS_IO_ACCOUNTING + /* Per-task storage I/O accounting starts */ + __u64 read_bytes; /* bytes of read I/O */ + __u64 write_bytes; /* bytes of write I/O */ + __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */ + + __u64 nvcsw; /* voluntary_ctxt_switches */ + __u64 nivcsw; /* nonvoluntary_ctxt_switches */ + + /* time accounting for SMT machines */ + __u64 ac_utimescaled; /* utime scaled on frequency etc */ + __u64 ac_stimescaled; /* stime scaled on frequency etc */ + __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ + + /* Delay waiting for memory reclaim */ + __u64 freepages_count; + __u64 freepages_delay_total; + + + /* Delay waiting for thrashing page */ + __u64 thrashing_count; + __u64 thrashing_delay_total; + + /* v10: 64-bit btime to avoid overflow */ + __u64 ac_btime64; /* 64-bit begin time */ + + /* v11: Delay waiting for memory compact */ + __u64 compact_count; + __u64 compact_delay_total; + + /* v12 begin */ + __u32 ac_tgid; /* thread group ID */ + /* Thread group walltime up to now. This is total process walltime if + * AGROUP flag is set. + */ + __u64 ac_tgetime __attribute__((aligned(8))); + /* Lightweight information to identify process binary files. + * This leaves userspace to match this to a file system path, using + * MAJOR() and MINOR() macros to identify a device and mount point, + * the inode to identify the executable file. This is /proc/self/exe + * at the end, so matching the most recent exec(). Values are zero + * for kernel threads. + */ + __u64 ac_exe_dev; /* program binary device ID */ + __u64 ac_exe_inode; /* program binary inode number */ + /* v12 end */ + + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; + + /* v14: Delay waiting for IRQ/SOFTIRQ */ + __u64 irq_count; + __u64 irq_delay_total; + + /* v15: add Delay max and Delay min */ + + /* v16: move Delay max and Delay min to the end of taskstat */ + __u64 cpu_delay_max; + __u64 cpu_delay_min; + + __u64 blkio_delay_max; + __u64 blkio_delay_min; + + __u64 swapin_delay_max; + __u64 swapin_delay_min; + + __u64 freepages_delay_max; + __u64 freepages_delay_min; + + __u64 thrashing_delay_max; + __u64 thrashing_delay_min; + + __u64 compact_delay_max; + __u64 compact_delay_min; + + __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; + + __u64 irq_delay_max; + __u64 irq_delay_min; + + /*v17: delay max timestamp record*/ + struct __kernel_timespec cpu_delay_max_ts; + struct __kernel_timespec blkio_delay_max_ts; + struct __kernel_timespec swapin_delay_max_ts; + struct __kernel_timespec freepages_delay_max_ts; + struct __kernel_timespec thrashing_delay_max_ts; + struct __kernel_timespec compact_delay_max_ts; + struct __kernel_timespec wpcopy_delay_max_ts; + struct __kernel_timespec irq_delay_max_ts; +}; + + +/* + * Commands sent from userspace + * Not versioned. New commands should only be inserted at the enum's end + * prior to __TASKSTATS_CMD_MAX + */ + +enum { + TASKSTATS_CMD_UNSPEC = 0, /* Reserved */ + TASKSTATS_CMD_GET, /* user->kernel request/get-response */ + TASKSTATS_CMD_NEW, /* kernel->user event */ + __TASKSTATS_CMD_MAX, +}; + +#define TASKSTATS_CMD_MAX (__TASKSTATS_CMD_MAX - 1) + +enum { + TASKSTATS_TYPE_UNSPEC = 0, /* Reserved */ + TASKSTATS_TYPE_PID, /* Process id */ + TASKSTATS_TYPE_TGID, /* Thread group id */ + TASKSTATS_TYPE_STATS, /* taskstats structure */ + TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ + TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ + TASKSTATS_TYPE_NULL, /* contains nothing */ + __TASKSTATS_TYPE_MAX, +}; + +#define TASKSTATS_TYPE_MAX (__TASKSTATS_TYPE_MAX - 1) + +enum { + TASKSTATS_CMD_ATTR_UNSPEC = 0, + TASKSTATS_CMD_ATTR_PID, + TASKSTATS_CMD_ATTR_TGID, + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, + __TASKSTATS_CMD_ATTR_MAX, +}; + +#define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1) + +/* NETLINK_GENERIC related info */ + +#define TASKSTATS_GENL_NAME "TASKSTATS" +#define TASKSTATS_GENL_VERSION 0x1 + +#endif /* _LINUX_TASKSTATS_H */ -- cgit v1.2.3 From f9a80c7ce49e2a77b769264712fe2f59479b5f5a Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Tue, 31 Mar 2026 06:06:12 -0700 Subject: bpf: Clarify BPF_RB_NO_WAKEUP behavior for bpf_ringbuf_discard() Clarify bpf_ringbuf_discard() documentation for BPF_RB_NO_WAKEUP. Discarded ring buffer records are still left in the ring buffer and are only skipped when user space consumes them. This can matter when BPF_RB_NO_WAKEUP is used: a later submit relying on adaptive wakeup might not wake the consumer, because the discarded record still needs to be consumed first. Scenario: epoll_wait(rb_fd); // blocks rec = bpf_ringbuf_reserve(&rb, ...); bpf_ringbuf_discard(rec, BPF_RB_NO_WAKEUP); rec = bpf_ringbuf_reserve(&rb, ...); bpf_ringbuf_submit(rec, 0); // valid record, but no wakeup Document this in bpf_ringbuf_discard() to make the interaction between discarded records, user-space consumption, and adaptive wakeups explicit. Reported-by: Shmulik Ladkani Signed-off-by: Eyal Birger Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260331130612.3762433-1-eyal.birger@gmail.com ---- v2: adapt wording per feedback from Andrii. --- include/uapi/linux/bpf.h | 4 +++- tools/include/uapi/linux/bpf.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8d400b7680a..552bc5d9afbd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4645,7 +4645,9 @@ union bpf_attr { * Description * Discard reserved ring buffer sample, pointed to by *data*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification - * of new data availability is sent. + * of new data availability is sent. Discarded records remain in + * the ring buffer until consumed by user space, so a later submit + * using adaptive wakeup might not wake up the consumer. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. * If **0** is specified in *flags*, an adaptive notification diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5e38b4887de6..677be9a47347 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4645,7 +4645,9 @@ union bpf_attr { * Description * Discard reserved ring buffer sample, pointed to by *data*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification - * of new data availability is sent. + * of new data availability is sent. Discarded records remain in + * the ring buffer until consumed by user space, so a later submit + * using adaptive wakeup might not wake up the consumer. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. * If **0** is specified in *flags*, an adaptive notification -- cgit v1.2.3 From 7f8969aa739da4d2096f2e6f87e030de6efad9dc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 31 Mar 2026 18:06:34 -0300 Subject: perf beauty: Move copy of fadvise.h from tools/include/ to tools/perf/trace/beauty/include/ As it is not really used when compiling anything, just being parsed to collect number->string tables for 'perf trace'. $ git grep fadvise.h tools/ tools/perf/Makefile.perf:$(fadvise_advice_array): $(beauty_uapi_linux_dir)/fadvise.h $(fadvise_advice_tbl) tools/perf/check-headers.sh: "include/uapi/linux/fadvise.h" tools/perf/trace/beauty/fadvise.sh:grep -E $regex ${header_dir}/fadvise.h | \ tools/perf/trace/beauty/fadvise.sh:# tools/include/uapi/linux/fadvise.h for details. $ Link: https://lore.kernel.org/r/CAP-5=fVBNQVF8k3JUQjH1nkP69ZVp8BqP+uwygcx=xO0zC4xrg@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/fadvise.h | 22 ---------------------- tools/perf/Makefile.perf | 4 ++-- tools/perf/check-headers.sh | 2 +- tools/perf/trace/beauty/fadvise.sh | 2 +- .../perf/trace/beauty/include/uapi/linux/fadvise.h | 22 ++++++++++++++++++++++ 5 files changed, 26 insertions(+), 26 deletions(-) delete mode 100644 tools/include/uapi/linux/fadvise.h create mode 100644 tools/perf/trace/beauty/include/uapi/linux/fadvise.h (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/fadvise.h b/tools/include/uapi/linux/fadvise.h deleted file mode 100644 index 0862b87434c2..000000000000 --- a/tools/include/uapi/linux/fadvise.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef FADVISE_H_INCLUDED -#define FADVISE_H_INCLUDED - -#define POSIX_FADV_NORMAL 0 /* No further special treatment. */ -#define POSIX_FADV_RANDOM 1 /* Expect random page references. */ -#define POSIX_FADV_SEQUENTIAL 2 /* Expect sequential page references. */ -#define POSIX_FADV_WILLNEED 3 /* Will need these pages. */ - -/* - * The advise values for POSIX_FADV_DONTNEED and POSIX_ADV_NOREUSE - * for s390-64 differ from the values for the rest of the world. - */ -#if defined(__s390x__) -#define POSIX_FADV_DONTNEED 6 /* Don't need these pages. */ -#define POSIX_FADV_NOREUSE 7 /* Data will be accessed once. */ -#else -#define POSIX_FADV_DONTNEED 4 /* Don't need these pages. */ -#define POSIX_FADV_NOREUSE 5 /* Data will be accessed once. */ -#endif - -#endif /* FADVISE_H_INCLUDED */ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a560fbc84793..cee19c923c06 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -556,8 +556,8 @@ $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_t fadvise_advice_array := $(beauty_outdir)/fadvise_advice_array.c fadvise_advice_tbl := $(srctree)/tools/perf/trace/beauty/fadvise.sh -$(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl) - $(Q)$(SHELL) '$(fadvise_advice_tbl)' $(linux_uapi_dir) > $@ +$(fadvise_advice_array): $(beauty_uapi_linux_dir)/fadvise.h $(fadvise_advice_tbl) + $(Q)$(SHELL) '$(fadvise_advice_tbl)' $(beauty_uapi_linux_dir) > $@ fsmount_arrays := $(beauty_outdir)/fsmount_arrays.c fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c6b136fe8d13..531c0e0e84df 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -7,7 +7,6 @@ NC='\033[0m' # No Color declare -a FILES=( "include/uapi/linux/const.h" "include/uapi/linux/bits.h" - "include/uapi/linux/fadvise.h" "include/uapi/linux/fscrypt.h" "include/uapi/linux/genetlink.h" "include/uapi/linux/if_addr.h" @@ -91,6 +90,7 @@ declare -a BEAUTY_FILES=( "include/uapi/drm/drm.h" "include/uapi/drm/i915_drm.h" "include/linux/socket.h" + "include/uapi/linux/fadvise.h" "include/uapi/linux/fcntl.h" "include/uapi/linux/fs.h" "include/uapi/linux/mount.h" diff --git a/tools/perf/trace/beauty/fadvise.sh b/tools/perf/trace/beauty/fadvise.sh index 4d3dd6e56ded..e9857112fa51 100755 --- a/tools/perf/trace/beauty/fadvise.sh +++ b/tools/perf/trace/beauty/fadvise.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: LGPL-2.1 -[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/uapi/linux/ printf "static const char *fadvise_advices[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+POSIX_FADV_(\w+)[[:space:]]+([[:digit:]]+)[[:space:]]+.*' diff --git a/tools/perf/trace/beauty/include/uapi/linux/fadvise.h b/tools/perf/trace/beauty/include/uapi/linux/fadvise.h new file mode 100644 index 000000000000..0862b87434c2 --- /dev/null +++ b/tools/perf/trace/beauty/include/uapi/linux/fadvise.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef FADVISE_H_INCLUDED +#define FADVISE_H_INCLUDED + +#define POSIX_FADV_NORMAL 0 /* No further special treatment. */ +#define POSIX_FADV_RANDOM 1 /* Expect random page references. */ +#define POSIX_FADV_SEQUENTIAL 2 /* Expect sequential page references. */ +#define POSIX_FADV_WILLNEED 3 /* Will need these pages. */ + +/* + * The advise values for POSIX_FADV_DONTNEED and POSIX_ADV_NOREUSE + * for s390-64 differ from the values for the rest of the world. + */ +#if defined(__s390x__) +#define POSIX_FADV_DONTNEED 6 /* Don't need these pages. */ +#define POSIX_FADV_NOREUSE 7 /* Data will be accessed once. */ +#else +#define POSIX_FADV_DONTNEED 4 /* Don't need these pages. */ +#define POSIX_FADV_NOREUSE 5 /* Data will be accessed once. */ +#endif + +#endif /* FADVISE_H_INCLUDED */ -- cgit v1.2.3 From 7789c6bb76acf21539c2c74b0cc869bb57de99e6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:18 +0200 Subject: net: Add queue-create operation Add a ynl netdev family operation called queue-create that creates a new queue on a netdevice: name: queue-create attribute-set: queue flags: [admin-perm] do: request: attributes: - ifindex - type - lease reply: &queue-create-op attributes: - id This is a generic operation such that it can be extended for various use cases in future. Right now it is mandatory to specify ifindex, the queue type which is enforced to rx and a lease. The newly created queue id is returned to the caller. A queue from a virtual device can have a lease which refers to another queue from a physical device. This is useful for memory providers and AF_XDP operations which take an ifindex and queue id to allow applications to bind against virtual devices in containers. The lease couples both queues together and allows to proxy the operations from a virtual device in a container to the physical device. In future, the nested lease attribute can be lifted and made optional for other use-cases such as dynamic queue creation for physical netdevs. The lack of lease and the specification of the physical device as an ifindex will imply that we need a real queue to be allocated. Similarly, the queue type enforcement to rx can then be lifted as well to support tx. An early implementation had only driver-specific integration [0], but in order for other virtual devices to reuse, it makes sense to have this as a generic API in core net. For leasing queues, the virtual netdev must have real_num_rx_queues less than num_rx_queues at the time of calling queue-create. The queue-type must be rx as only rx queues are supported for leasing for now. We also enforce that the queue-create ifindex must point to a virtual device, and that the nested lease attribute's ifindex must point to a physical device. The nested lease attribute set contains a netns-id attribute which is optional and can specify a netns-id relative to the caller's netns. It requires cap_net_admin and if the netns-id attribute is not specified, the lease ifindex will be retrieved from the current netns. Also, it is modeled as an s32 type similarly as done elsewhere in the stack. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0] Link: https://patch.msgid.link/20260402231031.447597-2-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 46 +++++++++++++++++++++++++++++++++ include/uapi/linux/netdev.h | 11 ++++++++ net/core/netdev-genl-gen.c | 20 ++++++++++++++ net/core/netdev-genl-gen.h | 2 ++ net/core/netdev-genl.c | 5 ++++ tools/include/uapi/linux/netdev.h | 11 ++++++++ 6 files changed, 95 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 596c306ce52b..b93beb247a11 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -339,6 +339,15 @@ attribute-sets: doc: XSK information for this queue, if any. type: nest nested-attributes: xsk-info + - + name: lease + doc: | + A queue from a virtual device can have a lease which refers to + another queue from a physical device. This is useful for memory + providers and AF_XDP operations which take an ifindex and queue id + to allow applications to bind against virtual devices in containers. + type: nest + nested-attributes: lease - name: qstats doc: | @@ -537,6 +546,26 @@ attribute-sets: name: id - name: type + - + name: lease + attributes: + - + name: ifindex + doc: The netdev ifindex to lease the queue from. + type: u32 + checks: + min: 1 + - + name: queue + doc: The netdev queue to lease from. + type: nest + nested-attributes: queue-id + - + name: netns-id + doc: The network namespace id of the netdev. + type: s32 + checks: + min: 0 - name: dmabuf attributes: @@ -686,6 +715,7 @@ operations: - dmabuf - io-uring - xsk + - lease dump: request: attributes: @@ -797,6 +827,22 @@ operations: reply: attributes: - id + - + name: queue-create + doc: | + Create a new queue for the given netdevice. Whether this operation + is supported depends on the device and the driver. + attribute-set: queue + flags: [admin-perm] + do: + request: + attributes: + - ifindex + - type + - lease + reply: &queue-create-op + attributes: + - id kernel-family: headers: ["net/netdev_netlink.h"] diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e0b579a1df4f..7df1056a35fd 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -160,6 +160,7 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -202,6 +203,15 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_LEASE_IFINDEX = 1, + NETDEV_A_LEASE_QUEUE, + NETDEV_A_LEASE_NETNS_ID, + + __NETDEV_A_LEASE_MAX, + NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) +}; + enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -228,6 +238,7 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, + NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index ba673e81716f..81aecb5d3bc5 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -28,6 +28,12 @@ static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range }; /* Common nested types */ +const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = { + [NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), + [NETDEV_A_LEASE_NETNS_ID] = NLA_POLICY_MIN(NLA_S32, 0), +}; + const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), @@ -107,6 +113,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, }; +/* NETDEV_CMD_QUEUE_CREATE - do */ +static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), + [NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -205,6 +218,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_DMABUF_FD, .flags = GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_QUEUE_CREATE, + .doit = netdev_nl_queue_create_doit, + .policy = netdev_queue_create_nl_policy, + .maxattr = NETDEV_A_QUEUE_LEASE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index cffc08517a41..d71b435d72c1 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -14,6 +14,7 @@ #include /* Common nested types */ +extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1]; extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1]; @@ -36,6 +37,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 470fabbeacd9..aae75431858d 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -1120,6 +1120,11 @@ err_genlmsg_free: return err; } +int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e0b579a1df4f..7df1056a35fd 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -160,6 +160,7 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -202,6 +203,15 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_LEASE_IFINDEX = 1, + NETDEV_A_LEASE_QUEUE, + NETDEV_A_LEASE_NETNS_ID, + + __NETDEV_A_LEASE_MAX, + NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) +}; + enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -228,6 +238,7 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, + NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3