From 86de56487e5f0017ffd5930b0dbd9dda43048849 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 30 Jul 2025 11:58:52 -0700 Subject: bpf: Allow syscall bpf programs to call non-recur helpers Allow syscall programs to call non-recur helpers too since syscall bpf programs runs in process context through bpf syscall, BPF_PROG_TEST_RUN, and cannot run recursively. bpf_task_storage_{get,set} have "_recur" versions that call trylock instead of taking the lock directly to avoid deadlock when called by bpf programs that run recursively. Currently, only bpf_lsm, bpf_iter, struct_ops without private stack are allow to call the non-recur helpers since they cannot be recursively called in another bpf program. Signed-off-by: Amery Hung Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20250730185903.3574598-2-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 94defa405c85..c823f8efe3ed 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -962,6 +962,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) case BPF_PROG_TYPE_STRUCT_OPS: return prog->aux->jits_use_priv_stack; case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_SYSCALL: return false; default: return true; -- cgit v1.2.3 From d87a513d093726d121dd5c816e26803111a259d0 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 6 Aug 2025 09:25:38 -0700 Subject: bpf: Allow struct_ops to get map id by kdata Add bpf_struct_ops_id() to enable struct_ops implementors to use struct_ops map id as the unique id of a struct_ops in their subsystem. A subsystem that wishes to create a mapping between id and struct_ops instance pointer can update the mapping accordingly during bpf_struct_ops::reg(), unreg(), and update(). Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250806162540.681679-2-ameryhung@gmail.com --- include/linux/bpf.h | 1 + kernel/bpf/bpf_struct_ops.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc700925b802..e7ee089e8a31 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1985,6 +1985,7 @@ static inline void bpf_module_put(const void *data, struct module *owner) module_put(owner); } int bpf_struct_ops_link_create(union bpf_attr *attr); +u32 bpf_struct_ops_id(const void *kdata); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 687a3e9c76f5..a41e6730edcf 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -1174,6 +1174,18 @@ void bpf_struct_ops_put(const void *kdata) bpf_map_put(&st_map->map); } +u32 bpf_struct_ops_id(const void *kdata) +{ + struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map; + + kvalue = container_of(kdata, struct bpf_struct_ops_value, data); + st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); + + return st_map->map.id; +} +EXPORT_SYMBOL_GPL(bpf_struct_ops_id); + static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; -- cgit v1.2.3 From 17e8b7e08fa8bf7a936f70444a42a88750410251 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Jul 2025 09:48:54 +0200 Subject: fs: mark file_remove_privs_flags static file_remove_privs_flags is only used inside of inode.c, mark it static. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250724074854.3316911-1-hch@lst.de Signed-off-by: Christian Brauner --- fs/inode.c | 3 +-- include/linux/fs.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 01ebdc40021e..3cbb78412a3e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2189,7 +2189,7 @@ static int __remove_privs(struct mnt_idmap *idmap, return notify_change(idmap, dentry, &newattrs, NULL); } -int file_remove_privs_flags(struct file *file, unsigned int flags) +static int file_remove_privs_flags(struct file *file, unsigned int flags) { struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); @@ -2214,7 +2214,6 @@ int file_remove_privs_flags(struct file *file, unsigned int flags) inode_has_no_xattr(inode); return error; } -EXPORT_SYMBOL_GPL(file_remove_privs_flags); /** * file_remove_privs - remove special file privileges (suid, capabilities) diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..796319914b0a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3393,7 +3393,6 @@ static inline struct inode *new_inode_pseudo(struct super_block *sb) extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); -extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); -- cgit v1.2.3 From 4e021920812d164bb02c30cc40e08a3681b1c755 Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Wed, 30 Jul 2025 20:18:53 +0000 Subject: fs: document 'name' parameter for name_contains_dotdot() The kernel-doc for name_contains_dotdot() was missing the @name parameter description, leading to a warning during make htmldocs. Add the missing documentation to resolve this warning. Signed-off-by: Kriish Sharma Link: https://lore.kernel.org/20250730201853.8436-1-kriish.sharma2006@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 796319914b0a..780e9c774c54 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3281,7 +3281,7 @@ static inline bool is_dot_dotdot(const char *name, size_t len) /** * name_contains_dotdot - check if a file name contains ".." path components - * + * @name: File path string to check * Search for ".." surrounded by either '/' or start/end of string. */ static inline bool name_contains_dotdot(const char *name) -- cgit v1.2.3 From 56ecfd9175b999dfc303ac6a0f9ea4bd1bee49d7 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 23 Jul 2025 14:21:54 +0100 Subject: fs: Remove mount_nodev mount_nodev has had no in-tree users since cc0876f817d6 ("vfs: Convert devpts to use the new mount API"). Remove it. Signed-off-by: Pedro Falcato Link: https://lore.kernel.org/20250723132156.225410-2-pfalcato@suse.de Signed-off-by: Christian Brauner --- fs/super.c | 20 -------------------- include/linux/fs.h | 3 --- 2 files changed, 23 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 7f876f32343a..7daa20737f2e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1773,26 +1773,6 @@ void kill_block_super(struct super_block *sb) EXPORT_SYMBOL(kill_block_super); #endif -struct dentry *mount_nodev(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)) -{ - int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); - - if (IS_ERR(s)) - return ERR_CAST(s); - - error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - s->s_flags |= SB_ACTIVE; - return dget(s->s_root); -} -EXPORT_SYMBOL(mount_nodev); - /** * vfs_get_tree - Get the mountable root * @fc: The superblock configuration context. diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..204328ed7ebb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2716,9 +2716,6 @@ static inline bool is_mgtime(const struct inode *inode) extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -extern struct dentry *mount_nodev(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); -- cgit v1.2.3 From f7d161c2804f3ad36bdc3222cb93c8fee67be98c Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 23 Jul 2025 14:21:55 +0100 Subject: fs: Remove mount_bdev mount_bdev has no in-tree users ever since f2fs adopted the new mount API. Remove it. Signed-off-by: Pedro Falcato Link: https://lore.kernel.org/20250723132156.225410-3-pfalcato@suse.de Signed-off-by: Christian Brauner --- fs/super.c | 43 ------------------------------------------- include/linux/fs.h | 3 --- 2 files changed, 46 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 7daa20737f2e..a038848e8d1f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1716,49 +1716,6 @@ int get_tree_bdev(struct fs_context *fc, } EXPORT_SYMBOL(get_tree_bdev); -static int test_bdev_super(struct super_block *s, void *data) -{ - return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data; -} - -struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) -{ - struct super_block *s; - int error; - dev_t dev; - - error = lookup_bdev(dev_name, &dev); - if (error) - return ERR_PTR(error); - - flags |= SB_NOSEC; - s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev); - if (IS_ERR(s)) - return ERR_CAST(s); - - if (s->s_root) { - if ((flags ^ s->s_flags) & SB_RDONLY) { - deactivate_locked_super(s); - return ERR_PTR(-EBUSY); - } - } else { - error = setup_bdev_super(s, flags, NULL); - if (!error) - error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - - s->s_flags |= SB_ACTIVE; - } - - return dget(s->s_root); -} -EXPORT_SYMBOL(mount_bdev); - void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; diff --git a/include/linux/fs.h b/include/linux/fs.h index 204328ed7ebb..98afcf455b28 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2713,9 +2713,6 @@ static inline bool is_mgtime(const struct inode *inode) return inode->i_opflags & IOP_MGTIME; } -extern struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); -- cgit v1.2.3 From ad7fe23b4b0dc0c26187df92a5649948ef7049fa Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 6 Aug 2025 16:07:05 +1000 Subject: fscontext: add custom-prefix log helpers Sometimes, errors associated with an fscontext come from the VFS or otherwise outside of the filesystem driver itself. However, the default logging of errorfc will always prefix the message with the filesystem name. So, add some *fcp() wrappers that allow for custom prefixes to be used when emitting information to the fscontext log. Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250806-errorfc-mount-too-revealing-v2-1-534b9b4d45bb@cyphar.com Signed-off-by: Christian Brauner --- include/linux/fs_context.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 7773eb870039..671f031be173 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -186,10 +186,12 @@ struct fc_log { extern __attribute__((format(printf, 4, 5))) void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...); -#define __logfc(fc, l, fmt, ...) logfc((fc)->log.log, NULL, \ - l, fmt, ## __VA_ARGS__) -#define __plog(p, l, fmt, ...) logfc((p)->log, (p)->prefix, \ - l, fmt, ## __VA_ARGS__) +#define __logfc(fc, l, fmt, ...) \ + logfc((fc)->log.log, NULL, (l), (fmt), ## __VA_ARGS__) +#define __plogp(p, prefix, l, fmt, ...) \ + logfc((p)->log, (prefix), (l), (fmt), ## __VA_ARGS__) +#define __plog(p, l, fmt, ...) __plogp(p, (p)->prefix, l, fmt, ## __VA_ARGS__) + /** * infof - Store supplementary informational message * @fc: The context in which to log the informational message @@ -201,6 +203,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__) #define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__) #define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) +#define infofcp(fc, prefix, fmt, ...) \ + __plogp((&(fc)->log), prefix, 'i', fmt, ## __VA_ARGS__) /** * warnf - Store supplementary warning message @@ -213,6 +217,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define warnf(fc, fmt, ...) __logfc(fc, 'w', fmt, ## __VA_ARGS__) #define warn_plog(p, fmt, ...) __plog(p, 'w', fmt, ## __VA_ARGS__) #define warnfc(fc, fmt, ...) __plog((&(fc)->log), 'w', fmt, ## __VA_ARGS__) +#define warnfcp(fc, prefix, fmt, ...) \ + __plogp((&(fc)->log), prefix, 'w', fmt, ## __VA_ARGS__) /** * errorf - Store supplementary error message @@ -225,6 +231,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define errorf(fc, fmt, ...) __logfc(fc, 'e', fmt, ## __VA_ARGS__) #define error_plog(p, fmt, ...) __plog(p, 'e', fmt, ## __VA_ARGS__) #define errorfc(fc, fmt, ...) __plog((&(fc)->log), 'e', fmt, ## __VA_ARGS__) +#define errorfcp(fc, prefix, fmt, ...) \ + __plogp((&(fc)->log), prefix, 'e', fmt, ## __VA_ARGS__) /** * invalf - Store supplementary invalid argument error message @@ -237,5 +245,7 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define invalf(fc, fmt, ...) (errorf(fc, fmt, ## __VA_ARGS__), -EINVAL) #define inval_plog(p, fmt, ...) (error_plog(p, fmt, ## __VA_ARGS__), -EINVAL) #define invalfc(fc, fmt, ...) (errorfc(fc, fmt, ## __VA_ARGS__), -EINVAL) +#define invalfcp(fc, prefix, fmt, ...) \ + (errorfcp(fc, prefix, fmt, ## __VA_ARGS__), -EINVAL) #endif /* _LINUX_FS_CONTEXT_H */ -- cgit v1.2.3 From bb2441402392ef1f49563be68e8f0dcb127ac965 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Tue, 5 Aug 2025 22:40:56 +0300 Subject: regulator: add s2dos05 regulator support S2DOS05 has 1 buck and 4 LDO regulators, used for powering panel/touchscreen. Signed-off-by: Dzmitry Sankouski Link: https://patch.msgid.link/20250805-starqltechn_integration_upstream-v8-1-09d8a321fafe@gmail.com Signed-off-by: Mark Brown --- MAINTAINERS | 2 +- drivers/regulator/Kconfig | 8 ++ drivers/regulator/Makefile | 1 + drivers/regulator/s2dos05-regulator.c | 165 ++++++++++++++++++++++++++++++++++ include/linux/regulator/s2dos05.h | 73 +++++++++++++++ 5 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 drivers/regulator/s2dos05-regulator.c create mode 100644 include/linux/regulator/s2dos05.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..f35f9b503956 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22387,7 +22387,7 @@ F: Documentation/devicetree/bindings/regulator/samsung,s2m*.yaml F: Documentation/devicetree/bindings/regulator/samsung,s5m*.yaml F: drivers/clk/clk-s2mps11.c F: drivers/mfd/sec*.[ch] -F: drivers/regulator/s2m*.c +F: drivers/regulator/s2*.c F: drivers/regulator/s5m*.c F: drivers/rtc/rtc-s5m.c F: include/linux/mfd/samsung/ diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index eaa6df1c9f80..2399c8a9a1c5 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -1344,6 +1344,14 @@ config REGULATOR_RTQ2208 and two ldos. It features wide output voltage range from 0.4V to 2.05V and the capability to configure the corresponding power stages. +config REGULATOR_S2DOS05 + tristate "Samsung S2DOS05 voltage regulator" + depends on MFD_SEC_CORE || COMPILE_TEST + help + This driver provides support for the voltage regulators of the S2DOS05. + The S2DOS05 is a companion power management IC for the smart phones. + The S2DOS05 has 4 LDOs and 1 BUCK outputs. + config REGULATOR_S2MPA01 tristate "Samsung S2MPA01 voltage regulator" depends on MFD_SEC_CORE || COMPILE_TEST diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index be98b29d6675..78605b0fa0b2 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -156,6 +156,7 @@ obj-$(CONFIG_REGULATOR_RTMV20) += rtmv20-regulator.o obj-$(CONFIG_REGULATOR_RTQ2134) += rtq2134-regulator.o obj-$(CONFIG_REGULATOR_RTQ6752) += rtq6752-regulator.o obj-$(CONFIG_REGULATOR_RTQ2208) += rtq2208-regulator.o +obj-$(CONFIG_REGULATOR_S2DOS05) += s2dos05-regulator.o obj-$(CONFIG_REGULATOR_S2MPA01) += s2mpa01.o obj-$(CONFIG_REGULATOR_S2MPS11) += s2mps11.o obj-$(CONFIG_REGULATOR_S5M8767) += s5m8767.o diff --git a/drivers/regulator/s2dos05-regulator.c b/drivers/regulator/s2dos05-regulator.c new file mode 100644 index 000000000000..1463585c4565 --- /dev/null +++ b/drivers/regulator/s2dos05-regulator.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// s2dos05.c - Regulator driver for the Samsung s2dos05 +// +// Copyright (C) 2025 Dzmitry Sankouski + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct s2dos05_data { + struct regmap *regmap; + struct device *dev; +}; + +#define _BUCK(macro) S2DOS05_BUCK##macro +#define _buck_ops(num) s2dos05_ops##num +#define _LDO(macro) S2DOS05_LDO##macro +#define _REG(ctrl) S2DOS05_REG##ctrl +#define _ldo_ops(num) s2dos05_ops##num +#define _MASK(macro) S2DOS05_ENABLE_MASK##macro +#define _TIME(macro) S2DOS05_ENABLE_TIME##macro + +#define BUCK_DESC(_name, _id, _ops, m, s, v, e, em, t, a) { \ + .name = _name, \ + .id = _id, \ + .ops = _ops, \ + .of_match = of_match_ptr(_name), \ + .of_match_full_name = true, \ + .regulators_node = of_match_ptr("regulators"), \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = m, \ + .uV_step = s, \ + .n_voltages = S2DOS05_BUCK_N_VOLTAGES, \ + .vsel_reg = v, \ + .vsel_mask = S2DOS05_BUCK_VSEL_MASK, \ + .enable_reg = e, \ + .enable_mask = em, \ + .enable_time = t, \ + .active_discharge_off = 0, \ + .active_discharge_on = S2DOS05_BUCK_FD_MASK, \ + .active_discharge_reg = a, \ + .active_discharge_mask = S2DOS05_BUCK_FD_MASK \ +} + +#define LDO_DESC(_name, _id, _ops, m, s, v, e, em, t, a) { \ + .name = _name, \ + .id = _id, \ + .ops = _ops, \ + .of_match = of_match_ptr(_name), \ + .of_match_full_name = true, \ + .regulators_node = of_match_ptr("regulators"), \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = m, \ + .uV_step = s, \ + .n_voltages = S2DOS05_LDO_N_VOLTAGES, \ + .vsel_reg = v, \ + .vsel_mask = S2DOS05_LDO_VSEL_MASK, \ + .enable_reg = e, \ + .enable_mask = em, \ + .enable_time = t, \ + .active_discharge_off = 0, \ + .active_discharge_on = S2DOS05_LDO_FD_MASK, \ + .active_discharge_reg = a, \ + .active_discharge_mask = S2DOS05_LDO_FD_MASK \ +} + +static const struct regulator_ops s2dos05_ops = { + .list_voltage = regulator_list_voltage_linear, + .map_voltage = regulator_map_voltage_linear, + .is_enabled = regulator_is_enabled_regmap, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .set_active_discharge = regulator_set_active_discharge_regmap, +}; + +static const struct regulator_desc regulators[S2DOS05_REGULATOR_MAX] = { + // name, id, ops, min_uv, uV_step, vsel_reg, enable_reg + LDO_DESC("ldo1", _LDO(1), &_ldo_ops(), _LDO(_MIN1), + _LDO(_STEP1), _REG(_LDO1_CFG), + _REG(_EN), _MASK(_L1), _TIME(_LDO), _REG(_LDO1_CFG)), + LDO_DESC("ldo2", _LDO(2), &_ldo_ops(), _LDO(_MIN1), + _LDO(_STEP1), _REG(_LDO2_CFG), + _REG(_EN), _MASK(_L2), _TIME(_LDO), _REG(_LDO2_CFG)), + LDO_DESC("ldo3", _LDO(3), &_ldo_ops(), _LDO(_MIN2), + _LDO(_STEP1), _REG(_LDO3_CFG), + _REG(_EN), _MASK(_L3), _TIME(_LDO), _REG(_LDO3_CFG)), + LDO_DESC("ldo4", _LDO(4), &_ldo_ops(), _LDO(_MIN2), + _LDO(_STEP1), _REG(_LDO4_CFG), + _REG(_EN), _MASK(_L4), _TIME(_LDO), _REG(_LDO4_CFG)), + BUCK_DESC("buck", _BUCK(1), &_buck_ops(), _BUCK(_MIN1), + _BUCK(_STEP1), _REG(_BUCK_VOUT), + _REG(_EN), _MASK(_B1), _TIME(_BUCK), _REG(_BUCK_CFG)), +}; + +static int s2dos05_pmic_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sec_pmic_dev *iodev = dev_get_drvdata(pdev->dev.parent); + struct s2dos05_data *s2dos05; + struct regulator_config config = { }; + unsigned int rdev_num = ARRAY_SIZE(regulators); + + s2dos05 = devm_kzalloc(dev, sizeof(*s2dos05), GFP_KERNEL); + if (!s2dos05) + return -ENOMEM; + + platform_set_drvdata(pdev, s2dos05); + + s2dos05->regmap = iodev->regmap_pmic; + s2dos05->dev = dev; + if (!dev->of_node) + dev->of_node = dev->parent->of_node; + + config.dev = dev; + config.driver_data = s2dos05; + + for (int i = 0; i < rdev_num; i++) { + struct regulator_dev *regulator; + + regulator = devm_regulator_register(&pdev->dev, + ®ulators[i], &config); + if (IS_ERR(regulator)) { + return dev_err_probe(&pdev->dev, PTR_ERR(regulator), + "regulator init failed for %d\n", i); + } + } + + return 0; +} + +static const struct platform_device_id s2dos05_pmic_id[] = { + { "s2dos05-regulator" }, + { }, +}; +MODULE_DEVICE_TABLE(platform, s2dos05_pmic_id); + +static struct platform_driver s2dos05_platform_driver = { + .driver = { + .name = "s2dos05", + }, + .probe = s2dos05_pmic_probe, + .id_table = s2dos05_pmic_id, +}; +module_platform_driver(s2dos05_platform_driver); + +MODULE_AUTHOR("Dzmitry Sankouski "); +MODULE_DESCRIPTION("Samsung S2DOS05 Regulator Driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/regulator/s2dos05.h b/include/linux/regulator/s2dos05.h new file mode 100644 index 000000000000..2e89fcbce769 --- /dev/null +++ b/include/linux/regulator/s2dos05.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +// s2dos05.h +// +// Copyright (c) 2016 Samsung Electronics Co., Ltd +// http://www.samsung.com +// Copyright (C) 2024 Dzmitry Sankouski + +#ifndef __LINUX_S2DOS05_H +#define __LINUX_S2DOS05_H + +// S2DOS05 registers +// Slave Addr : 0xC0 +enum S2DOS05_reg { + S2DOS05_REG_DEV_ID, + S2DOS05_REG_TOPSYS_STAT, + S2DOS05_REG_STAT, + S2DOS05_REG_EN, + S2DOS05_REG_LDO1_CFG, + S2DOS05_REG_LDO2_CFG, + S2DOS05_REG_LDO3_CFG, + S2DOS05_REG_LDO4_CFG, + S2DOS05_REG_BUCK_CFG, + S2DOS05_REG_BUCK_VOUT, + S2DOS05_REG_IRQ_MASK = 0x0D, + S2DOS05_REG_SSD_TSD = 0x0E, + S2DOS05_REG_OCL = 0x10, + S2DOS05_REG_IRQ = 0x11 +}; + +// S2DOS05 regulator ids +enum S2DOS05_regulators { + S2DOS05_LDO1, + S2DOS05_LDO2, + S2DOS05_LDO3, + S2DOS05_LDO4, + S2DOS05_BUCK1, + S2DOS05_REG_MAX, +}; + +#define S2DOS05_IRQ_PWRMT_MASK BIT(5) +#define S2DOS05_IRQ_TSD_MASK BIT(4) +#define S2DOS05_IRQ_SSD_MASK BIT(3) +#define S2DOS05_IRQ_SCP_MASK BIT(2) +#define S2DOS05_IRQ_UVLO_MASK BIT(1) +#define S2DOS05_IRQ_OCD_MASK BIT(0) + +#define S2DOS05_BUCK_MIN1 506250 +#define S2DOS05_LDO_MIN1 1500000 +#define S2DOS05_LDO_MIN2 2700000 +#define S2DOS05_BUCK_STEP1 6250 +#define S2DOS05_LDO_STEP1 25000 +#define S2DOS05_LDO_VSEL_MASK 0x7F +#define S2DOS05_LDO_FD_MASK 0x80 +#define S2DOS05_BUCK_VSEL_MASK 0xFF +#define S2DOS05_BUCK_FD_MASK 0x08 + +#define S2DOS05_ENABLE_MASK_L1 BIT(0) +#define S2DOS05_ENABLE_MASK_L2 BIT(1) +#define S2DOS05_ENABLE_MASK_L3 BIT(2) +#define S2DOS05_ENABLE_MASK_L4 BIT(3) +#define S2DOS05_ENABLE_MASK_B1 BIT(4) + +#define S2DOS05_RAMP_DELAY 12000 + +#define S2DOS05_ENABLE_TIME_LDO 50 +#define S2DOS05_ENABLE_TIME_BUCK 350 + +#define S2DOS05_LDO_N_VOLTAGES (S2DOS05_LDO_VSEL_MASK + 1) +#define S2DOS05_BUCK_N_VOLTAGES (S2DOS05_BUCK_VSEL_MASK + 1) + +#define S2DOS05_REGULATOR_MAX (S2DOS05_REG_MAX) + +#endif // __LINUX_S2DOS05_H -- cgit v1.2.3 From 181fe022ecf8a8e85def0e94852c631c59a8b3f6 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:44 +0200 Subject: gpiolib: add support to register sparse pin range Add support to register for GPIO<->pin mapping using a list of non consecutive pins. The core already supports sparse pin range (pins member of struct pinctrl_gpio_range), but it was not possible to register one. If pins is not NULL the core uses it, otherwise it assumes that a consecutive pin range was registered and it uses pin_base. The function gpiochip_add_pin_range() which allocates and fills the struct pinctrl_gpio_range was renamed to gpiochip_add_pin_range_with_pins() and the pins parameter was added. Two new functions were added, gpiochip_add_pin_range() and gpiochip_add_sparse_pin_range() to register a consecutive or sparse pins range. Both use gpiochip_add_pin_range_with_pins(). Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-1-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 29 ++++++++++++++++++-------- include/linux/gpio/driver.h | 51 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 0d2b470a252e..98d2fa602490 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2349,11 +2349,13 @@ int gpiochip_add_pingroup_range(struct gpio_chip *gc, EXPORT_SYMBOL_GPL(gpiochip_add_pingroup_range); /** - * gpiochip_add_pin_range() - add a range for GPIO <-> pin mapping + * gpiochip_add_pin_range_with_pins() - add a range for GPIO <-> pin mapping * @gc: the gpiochip to add the range for * @pinctl_name: the dev_name() of the pin controller to map to * @gpio_offset: the start offset in the current gpio_chip number space * @pin_offset: the start offset in the pin controller number space + * @pins: the list of non consecutive pins to accumulate in this range (if not + * NULL, pin_offset is ignored by pinctrl core) * @npins: the number of pins from the offset of each pin space (GPIO and * pin controller) to accumulate in this range * @@ -2365,9 +2367,12 @@ EXPORT_SYMBOL_GPL(gpiochip_add_pingroup_range); * Returns: * 0 on success, or a negative errno on failure. */ -int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, - unsigned int gpio_offset, unsigned int pin_offset, - unsigned int npins) +int gpiochip_add_pin_range_with_pins(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int pin_offset, + unsigned int const *pins, + unsigned int npins) { struct gpio_pin_range *pin_range; struct gpio_device *gdev = gc->gpiodev; @@ -2385,6 +2390,7 @@ int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, pin_range->range.name = gc->label; pin_range->range.base = gdev->base + gpio_offset; pin_range->range.pin_base = pin_offset; + pin_range->range.pins = pins; pin_range->range.npins = npins; pin_range->pctldev = pinctrl_find_and_add_gpio_range(pinctl_name, &pin_range->range); @@ -2394,16 +2400,21 @@ int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, kfree(pin_range); return ret; } - chip_dbg(gc, "created GPIO range %d->%d ==> %s PIN %d->%d\n", - gpio_offset, gpio_offset + npins - 1, - pinctl_name, - pin_offset, pin_offset + npins - 1); + if (pin_range->range.pins) + chip_dbg(gc, "created GPIO range %d->%d ==> %s %d sparse PIN range { %d, ... }", + gpio_offset, gpio_offset + npins - 1, + pinctl_name, npins, pins[0]); + else + chip_dbg(gc, "created GPIO range %d->%d ==> %s PIN %d->%d\n", + gpio_offset, gpio_offset + npins - 1, + pinctl_name, + pin_offset, pin_offset + npins - 1); list_add_tail(&pin_range->node, &gdev->pin_ranges); return 0; } -EXPORT_SYMBOL_GPL(gpiochip_add_pin_range); +EXPORT_SYMBOL_GPL(gpiochip_add_pin_range_with_pins); /** * gpiochip_remove_pin_ranges() - remove all the GPIO <-> pin mappings diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 667f8fd58a79..9fcd4a988081 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -772,16 +772,50 @@ struct gpio_pin_range { #ifdef CONFIG_PINCTRL -int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, - unsigned int gpio_offset, unsigned int pin_offset, - unsigned int npins); +int gpiochip_add_pin_range_with_pins(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int pin_offset, + unsigned int const *pins, + unsigned int npins); int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group); void gpiochip_remove_pin_ranges(struct gpio_chip *gc); +static inline int +gpiochip_add_pin_range(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int pin_offset, + unsigned int npins) +{ + return gpiochip_add_pin_range_with_pins(gc, pinctl_name, gpio_offset, + pin_offset, NULL, npins); +} + +static inline int +gpiochip_add_sparse_pin_range(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int const *pins, + unsigned int npins) +{ + return gpiochip_add_pin_range_with_pins(gc, pinctl_name, gpio_offset, 0, + pins, npins); +} #else /* ! CONFIG_PINCTRL */ +static inline int +gpiochip_add_pin_range_with_pins(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int pin_offset, + unsigned int npins) +{ + return 0; +} + static inline int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, @@ -789,6 +823,17 @@ gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, { return 0; } + +static inline int +gpiochip_add_sparse_pin_range(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int const *pins, + unsigned int npins) +{ + return 0; +} + static inline int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, -- cgit v1.2.3 From 6e986f8852f56cf9214ea2ec02b4b432e201d02c Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:49 +0200 Subject: gpio: aggregator: export symbols of the GPIO forwarder library Export all symbols and create header file for the GPIO forwarder library. It will be used in the next changes. Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-6-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 202 +++++++++++++++++++++++++++++++++++++++-- include/linux/gpio/forwarder.h | 37 ++++++++ 2 files changed, 233 insertions(+), 6 deletions(-) create mode 100644 include/linux/gpio/forwarder.h (limited to 'include/linux') diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index cc54d19f2dd1..c1952b28b7b7 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include +#include #include #include "dev-sync-probe.h" @@ -475,8 +477,180 @@ static int gpiochip_fwd_setup_delay_line(struct gpiochip_fwd *fwd) } #endif /* !CONFIG_OF_GPIO */ -static struct gpiochip_fwd * -devm_gpiochip_fwd_alloc(struct device *dev, unsigned int ngpios) +/** + * gpiochip_fwd_get_gpiochip - Get the GPIO chip for the GPIO forwarder + * @fwd: GPIO forwarder + * + * Returns: The GPIO chip for the GPIO forwarder + */ +struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd) +{ + return &fwd->chip; +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_get_gpiochip, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_get_direction - Return the current direction of a GPIO forwarder line + * @fwd: GPIO forwarder + * @offset: the offset of the line + * + * Returns: 0 for output, 1 for input, or an error code in case of error. + */ +int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd, unsigned int offset) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_get_direction(gc, offset); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_get_direction, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_direction_output - Set a GPIO forwarder line direction to + * output + * @fwd: GPIO forwarder + * @offset: the offset of the line + * @value: value to set + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_gpio_direction_output(struct gpiochip_fwd *fwd, unsigned int offset, + int value) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_direction_output(gc, offset, value); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_direction_output, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_direction_input - Set a GPIO forwarder line direction to input + * @fwd: GPIO forwarder + * @offset: the offset of the line + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_gpio_direction_input(struct gpiochip_fwd *fwd, unsigned int offset) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_direction_input(gc, offset); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_direction_input, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_get - Return a GPIO forwarder line's value + * @fwd: GPIO forwarder + * @offset: the offset of the line + * + * Returns: The GPIO's logical value, i.e. taking the ACTIVE_LOW status into + * account, or negative errno on failure. + */ +int gpiochip_fwd_gpio_get(struct gpiochip_fwd *fwd, unsigned int offset) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_get(gc, offset); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_get, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_get_multiple - Get values for multiple GPIO forwarder lines + * @fwd: GPIO forwarder + * @mask: bit mask array; one bit per line; BITS_PER_LONG bits per word defines + * which lines are to be read + * @bits: bit value array; one bit per line; BITS_PER_LONG bits per word will + * contains the read values for the lines specified by mask + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_gpio_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, + unsigned long *bits) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_get_multiple_locked(gc, mask, bits); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_get_multiple, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_set - Assign value to a GPIO forwarder line. + * @fwd: GPIO forwarder + * @offset: the offset of the line + * @value: value to set + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_gpio_set(struct gpiochip_fwd *fwd, unsigned int offset, int value) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_set(gc, offset, value); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_set, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_set_multiple - Assign values to multiple GPIO forwarder lines + * @fwd: GPIO forwarder + * @mask: bit mask array; one bit per output; BITS_PER_LONG bits per word + * defines which outputs are to be changed + * @bits: bit value array; one bit per output; BITS_PER_LONG bits per word + * defines the values the outputs specified by mask are to be set to + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_gpio_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, + unsigned long *bits) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_set_multiple_locked(gc, mask, bits); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_set_multiple, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_set_config - Set @config for a GPIO forwarder line + * @fwd: GPIO forwarder + * @offset: the offset of the line + * @config: Same packed config format as generic pinconf + * + * Returns: 0 on success, %-ENOTSUPP if the controller doesn't support setting + * the configuration. + */ +int gpiochip_fwd_gpio_set_config(struct gpiochip_fwd *fwd, unsigned int offset, + unsigned long config) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_set_config(gc, offset, config); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_set_config, "GPIO_FORWARDER"); + +/** + * gpiochip_fwd_gpio_to_irq - Return the IRQ corresponding to a GPIO forwarder line + * @fwd: GPIO forwarder + * @offset: the offset of the line + * + * Returns: The Linux IRQ corresponding to the passed line, or an error code in + * case of error. + */ +int gpiochip_fwd_gpio_to_irq(struct gpiochip_fwd *fwd, unsigned int offset) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_to_irq(gc, offset); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_to_irq, "GPIO_FORWARDER"); + +/** + * devm_gpiochip_fwd_alloc - Allocate and initialize a new GPIO forwarder + * @dev: Parent device pointer + * @ngpios: Number of GPIOs in the forwarder + * + * Returns: An opaque object pointer, or an ERR_PTR()-encoded negative error + * code on failure. + */ +struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, + unsigned int ngpios) { struct gpiochip_fwd *fwd; struct gpio_chip *chip; @@ -507,10 +681,18 @@ devm_gpiochip_fwd_alloc(struct device *dev, unsigned int ngpios) return fwd; } +EXPORT_SYMBOL_NS_GPL(devm_gpiochip_fwd_alloc, "GPIO_FORWARDER"); -static int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, - struct gpio_desc *desc, - unsigned int offset) +/** + * gpiochip_fwd_desc_add - Add a GPIO desc in the forwarder + * @fwd: GPIO forwarder + * @desc: GPIO descriptor to register + * @offset: offset for the GPIO in the forwarder + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, struct gpio_desc *desc, + unsigned int offset) { struct gpio_chip *parent = gpiod_to_chip(desc); struct gpio_chip *chip = &fwd->chip; @@ -537,8 +719,15 @@ static int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, return 0; } +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_desc_add, "GPIO_FORWARDER"); -static int gpiochip_fwd_register(struct gpiochip_fwd *fwd) +/** + * gpiochip_fwd_register - Register a GPIO forwarder + * @fwd: GPIO forwarder + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_register(struct gpiochip_fwd *fwd) { struct gpio_chip *chip = &fwd->chip; @@ -549,6 +738,7 @@ static int gpiochip_fwd_register(struct gpiochip_fwd *fwd) return devm_gpiochip_add_data(chip->parent, chip, fwd); } +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_register, "GPIO_FORWARDER"); /** * gpiochip_fwd_create() - Create a new GPIO forwarder diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h new file mode 100644 index 000000000000..e21a1b7b1905 --- /dev/null +++ b/include/linux/gpio/forwarder.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GPIO_FORWARDER_H +#define __LINUX_GPIO_FORWARDER_H + +struct gpio_desc; +struct gpio_chip; +struct gpiochip_fwd; + +struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, + unsigned int ngpios); +int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, + struct gpio_desc *desc, unsigned int offset); +int gpiochip_fwd_register(struct gpiochip_fwd *fwd); + +struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd); + +int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd, + unsigned int offset); +int gpiochip_fwd_gpio_direction_input(struct gpiochip_fwd *fwd, + unsigned int offset); +int gpiochip_fwd_gpio_direction_output(struct gpiochip_fwd *fwd, + unsigned int offset, + int value); +int gpiochip_fwd_gpio_get(struct gpiochip_fwd *fwd, unsigned int offset); +int gpiochip_fwd_gpio_get_multiple(struct gpiochip_fwd *fwd, + unsigned long *mask, + unsigned long *bits); +int gpiochip_fwd_gpio_set(struct gpiochip_fwd *fwd, unsigned int offset, + int value); +int gpiochip_fwd_gpio_set_multiple(struct gpiochip_fwd *fwd, + unsigned long *mask, + unsigned long *bits); +int gpiochip_fwd_gpio_set_config(struct gpiochip_fwd *fwd, unsigned int offset, + unsigned long config); +int gpiochip_fwd_gpio_to_irq(struct gpiochip_fwd *fwd, unsigned int offset); + +#endif -- cgit v1.2.3 From b31c68fd851e74526ad963362ea205eb97b9a710 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:50 +0200 Subject: gpio: aggregator: handle runtime registration of gpio_desc in gpiochip_fwd Add request() callback to check if the GPIO descriptor was well registered in the gpiochip_fwd before using it. This is done to handle the case where GPIO descriptor is added at runtime in the forwarder. If at least one GPIO descriptor was not added before the forwarder registration, we assume the forwarder can sleep as if a GPIO is added at runtime it may sleep. Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-7-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 63 ++++++++++++++++++++++++++++++++++++++---- include/linux/gpio/forwarder.h | 2 ++ 2 files changed, 59 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index c1952b28b7b7..f0d38d76cf73 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -246,6 +246,7 @@ struct gpiochip_fwd { spinlock_t slock; /* protects tmp[] if !can_sleep */ }; struct gpiochip_fwd_timing *delay_timings; + unsigned long *valid_mask; unsigned long tmp[]; /* values and descs for multiple ops */ }; @@ -254,10 +255,24 @@ struct gpiochip_fwd { #define fwd_tmp_size(ngpios) (BITS_TO_LONGS((ngpios)) + (ngpios)) +static int gpio_fwd_request(struct gpio_chip *chip, unsigned int offset) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + + return test_bit(offset, fwd->valid_mask) ? 0 : -ENODEV; +} + static int gpio_fwd_get_direction(struct gpio_chip *chip, unsigned int offset) { struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + /* + * get_direction() is called during gpiochip registration, return + * -ENODEV if there is no GPIO desc for the line. + */ + if (!test_bit(offset, fwd->valid_mask)) + return -ENODEV; + return gpiod_get_direction(fwd->descs[offset]); } @@ -489,6 +504,21 @@ struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd) } EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_get_gpiochip, "GPIO_FORWARDER"); +/** + * gpiochip_fwd_gpio_request - Request a line of the GPIO forwarder + * @fwd: GPIO forwarder + * @offset: the offset of the line to request + * + * Returns: 0 on success, or negative errno on failure. + */ +int gpiochip_fwd_gpio_request(struct gpiochip_fwd *fwd, unsigned int offset) +{ + struct gpio_chip *gc = gpiochip_fwd_get_gpiochip(fwd); + + return gpio_fwd_request(gc, offset); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_gpio_request, "GPIO_FORWARDER"); + /** * gpiochip_fwd_gpio_get_direction - Return the current direction of a GPIO forwarder line * @fwd: GPIO forwarder @@ -663,11 +693,16 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, if (!fwd->descs) return ERR_PTR(-ENOMEM); + fwd->valid_mask = devm_bitmap_zalloc(dev, ngpios, GFP_KERNEL); + if (!fwd->valid_mask) + return ERR_PTR(-ENOMEM); + chip = &fwd->chip; chip->label = dev_name(dev); chip->parent = dev; chip->owner = THIS_MODULE; + chip->request = gpio_fwd_request; chip->get_direction = gpio_fwd_get_direction; chip->direction_input = gpio_fwd_direction_input; chip->direction_output = gpio_fwd_direction_output; @@ -694,24 +729,21 @@ EXPORT_SYMBOL_NS_GPL(devm_gpiochip_fwd_alloc, "GPIO_FORWARDER"); int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, struct gpio_desc *desc, unsigned int offset) { - struct gpio_chip *parent = gpiod_to_chip(desc); struct gpio_chip *chip = &fwd->chip; if (offset > chip->ngpio) return -EINVAL; + if (test_and_set_bit(offset, fwd->valid_mask)) + return -EEXIST; + /* * If any of the GPIO lines are sleeping, then the entire forwarder * will be sleeping. - * If any of the chips support .set_config(), then the forwarder will - * support setting configs. */ if (gpiod_cansleep(desc)) chip->can_sleep = true; - if (parent && parent->set_config) - chip->set_config = gpio_fwd_set_config; - fwd->descs[offset] = desc; dev_dbg(chip->parent, "%u => gpio %d irq %d\n", offset, @@ -721,6 +753,18 @@ int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, struct gpio_desc *desc, } EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_desc_add, "GPIO_FORWARDER"); +/** + * gpiochip_fwd_desc_free - Remove a GPIO desc from the forwarder + * @fwd: GPIO forwarder + * @offset: offset of GPIO desc to remove + */ +void gpiochip_fwd_desc_free(struct gpiochip_fwd *fwd, unsigned int offset) +{ + if (test_and_clear_bit(offset, fwd->valid_mask)) + gpiod_put(fwd->descs[offset]); +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_desc_free, "GPIO_FORWARDER"); + /** * gpiochip_fwd_register - Register a GPIO forwarder * @fwd: GPIO forwarder @@ -731,6 +775,13 @@ int gpiochip_fwd_register(struct gpiochip_fwd *fwd) { struct gpio_chip *chip = &fwd->chip; + /* + * Some gpio_desc were not registered. They will be registered at runtime + * but we have to suppose they can sleep. + */ + if (!bitmap_full(fwd->valid_mask, chip->ngpio)) + chip->can_sleep = true; + if (chip->can_sleep) mutex_init(&fwd->mlock); else diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h index e21a1b7b1905..45e0190308f0 100644 --- a/include/linux/gpio/forwarder.h +++ b/include/linux/gpio/forwarder.h @@ -10,10 +10,12 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, unsigned int ngpios); int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, struct gpio_desc *desc, unsigned int offset); +void gpiochip_fwd_desc_free(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_register(struct gpiochip_fwd *fwd); struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd); +int gpiochip_fwd_gpio_request(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_gpio_direction_input(struct gpiochip_fwd *fwd, -- cgit v1.2.3 From 60e92c1009c7c6abd4a9d0caf33a8cba5d09f67c Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:51 +0200 Subject: gpio: aggregator: add possibility to attach data to the forwarder Add a data pointer to store private data in the forwarder. Reviewed-by: Andy Shevchenko Reviewed-by: Geert Uytterhoeven Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-8-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 20 ++++++++++++++++++-- include/linux/gpio/forwarder.h | 4 +++- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index f0d38d76cf73..fb3694d581d1 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -246,6 +246,7 @@ struct gpiochip_fwd { spinlock_t slock; /* protects tmp[] if !can_sleep */ }; struct gpiochip_fwd_timing *delay_timings; + void *data; unsigned long *valid_mask; unsigned long tmp[]; /* values and descs for multiple ops */ }; @@ -504,6 +505,18 @@ struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd) } EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_get_gpiochip, "GPIO_FORWARDER"); +/** + * gpiochip_fwd_get_data - Get driver-private data for the GPIO forwarder + * @fwd: GPIO forwarder + * + * Returns: The driver-private data for the GPIO forwarder + */ +void *gpiochip_fwd_get_data(struct gpiochip_fwd *fwd) +{ + return fwd->data; +} +EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_get_data, "GPIO_FORWARDER"); + /** * gpiochip_fwd_gpio_request - Request a line of the GPIO forwarder * @fwd: GPIO forwarder @@ -768,10 +781,11 @@ EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_desc_free, "GPIO_FORWARDER"); /** * gpiochip_fwd_register - Register a GPIO forwarder * @fwd: GPIO forwarder + * @data: driver-private data associated with this forwarder * * Returns: 0 on success, or negative errno on failure. */ -int gpiochip_fwd_register(struct gpiochip_fwd *fwd) +int gpiochip_fwd_register(struct gpiochip_fwd *fwd, void *data) { struct gpio_chip *chip = &fwd->chip; @@ -787,6 +801,8 @@ int gpiochip_fwd_register(struct gpiochip_fwd *fwd) else spin_lock_init(&fwd->slock); + fwd->data = data; + return devm_gpiochip_add_data(chip->parent, chip, fwd); } EXPORT_SYMBOL_NS_GPL(gpiochip_fwd_register, "GPIO_FORWARDER"); @@ -831,7 +847,7 @@ static struct gpiochip_fwd *gpiochip_fwd_create(struct device *dev, return ERR_PTR(error); } - error = gpiochip_fwd_register(fwd); + error = gpiochip_fwd_register(fwd, NULL); if (error) return ERR_PTR(error); diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h index 45e0190308f0..ee5d8355f735 100644 --- a/include/linux/gpio/forwarder.h +++ b/include/linux/gpio/forwarder.h @@ -11,10 +11,12 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, struct gpio_desc *desc, unsigned int offset); void gpiochip_fwd_desc_free(struct gpiochip_fwd *fwd, unsigned int offset); -int gpiochip_fwd_register(struct gpiochip_fwd *fwd); +int gpiochip_fwd_register(struct gpiochip_fwd *fwd, void *data); struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd); +void *gpiochip_fwd_get_data(struct gpiochip_fwd *fwd); + int gpiochip_fwd_gpio_request(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd, unsigned int offset); -- cgit v1.2.3 From 53ec9169db1345f04174febb90f88a871fc28d9e Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:52 +0200 Subject: lib/string_choices: Add str_input_output() helper Add str_input_output() helper to return 'input' or 'output' string literal. Also add the inversed variant str_output_input(). Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-9-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- include/linux/string_choices.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index f3ba4f52ff26..a27c87c954ae 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -41,6 +41,12 @@ static inline const char *str_high_low(bool v) } #define str_low_high(v) str_high_low(!(v)) +static inline const char *str_input_output(bool v) +{ + return v ? "input" : "output"; +} +#define str_output_input(v) str_input_output(!(v)) + static inline const char *str_on_off(bool v) { return v ? "on" : "off"; -- cgit v1.2.3 From 0daf35da397b083ea0ea5407196bb6bd210530ec Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 7 Aug 2025 13:13:10 +0530 Subject: soc: qcom: mdt_loader: Remove pas id parameter pas id is not used in qcom_mdt_load_no_init() and it should not be used as it is non-PAS specific function and has no relation to PAS specific mechanism. Reviewed-by: Dikshita Agarwal Acked-by: Jeff Johnson # drivers/net/wireless/ath/ath12k/ahb.c Signed-off-by: Mukesh Ojha Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250807074311.2381713-2-mukesh.ojha@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/media/platform/qcom/venus/firmware.c | 4 ++-- drivers/net/wireless/ath/ath12k/ahb.c | 2 +- drivers/remoteproc/qcom_q6v5_adsp.c | 2 +- drivers/remoteproc/qcom_q6v5_pas.c | 7 +++---- drivers/remoteproc/qcom_q6v5_wcss.c | 2 +- drivers/soc/qcom/mdt_loader.c | 10 ++++------ include/linux/soc/qcom/mdt_loader.h | 7 +++---- 7 files changed, 15 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c index 66a18830e66d..862d0718f694 100644 --- a/drivers/media/platform/qcom/venus/firmware.c +++ b/drivers/media/platform/qcom/venus/firmware.c @@ -136,8 +136,8 @@ static int venus_load_fw(struct venus_core *core, const char *fwname, ret = qcom_mdt_load(dev, mdt, fwname, VENUS_PAS_ID, mem_va, *mem_phys, *mem_size, NULL); else - ret = qcom_mdt_load_no_init(dev, mdt, fwname, VENUS_PAS_ID, - mem_va, *mem_phys, *mem_size, NULL); + ret = qcom_mdt_load_no_init(dev, mdt, fwname, mem_va, + *mem_phys, *mem_size, NULL); memunmap(mem_va); err_release_fw: diff --git a/drivers/net/wireless/ath/ath12k/ahb.c b/drivers/net/wireless/ath/ath12k/ahb.c index 3b983f4e3268..b30527c402f6 100644 --- a/drivers/net/wireless/ath/ath12k/ahb.c +++ b/drivers/net/wireless/ath/ath12k/ahb.c @@ -414,7 +414,7 @@ static int ath12k_ahb_power_up(struct ath12k_base *ab) goto err_fw2; } - ret = qcom_mdt_load_no_init(dev, fw2, fw2_name, pasid, mem_region, mem_phys, + ret = qcom_mdt_load_no_init(dev, fw2, fw2_name, mem_region, mem_phys, mem_size, &mem_phys); if (ret) { ath12k_err(ab, "Failed to load MDT segments: %d\n", ret); diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c b/drivers/remoteproc/qcom_q6v5_adsp.c index 94af77baa7a1..e98b7e03162c 100644 --- a/drivers/remoteproc/qcom_q6v5_adsp.c +++ b/drivers/remoteproc/qcom_q6v5_adsp.c @@ -317,7 +317,7 @@ static int adsp_load(struct rproc *rproc, const struct firmware *fw) struct qcom_adsp *adsp = rproc->priv; int ret; - ret = qcom_mdt_load_no_init(adsp->dev, fw, rproc->firmware, 0, + ret = qcom_mdt_load_no_init(adsp->dev, fw, rproc->firmware, adsp->mem_region, adsp->mem_phys, adsp->mem_size, &adsp->mem_reloc); if (ret) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 02e29171cbbe..55a7da801183 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -242,9 +242,8 @@ static int qcom_pas_load(struct rproc *rproc, const struct firmware *fw) goto release_dtb_firmware; ret = qcom_mdt_load_no_init(pas->dev, pas->dtb_firmware, pas->dtb_firmware_name, - pas->dtb_pas_id, pas->dtb_mem_region, - pas->dtb_mem_phys, pas->dtb_mem_size, - &pas->dtb_mem_reloc); + pas->dtb_mem_region, pas->dtb_mem_phys, + pas->dtb_mem_size, &pas->dtb_mem_reloc); if (ret) goto release_dtb_metadata; } @@ -307,7 +306,7 @@ static int qcom_pas_start(struct rproc *rproc) if (ret) goto disable_px_supply; - ret = qcom_mdt_load_no_init(pas->dev, pas->firmware, rproc->firmware, pas->pas_id, + ret = qcom_mdt_load_no_init(pas->dev, pas->firmware, rproc->firmware, pas->mem_region, pas->mem_phys, pas->mem_size, &pas->mem_reloc); if (ret) diff --git a/drivers/remoteproc/qcom_q6v5_wcss.c b/drivers/remoteproc/qcom_q6v5_wcss.c index 93648734a2f2..07c88623f597 100644 --- a/drivers/remoteproc/qcom_q6v5_wcss.c +++ b/drivers/remoteproc/qcom_q6v5_wcss.c @@ -757,7 +757,7 @@ static int q6v5_wcss_load(struct rproc *rproc, const struct firmware *fw) int ret; ret = qcom_mdt_load_no_init(wcss->dev, fw, rproc->firmware, - 0, wcss->mem_region, wcss->mem_phys, + wcss->mem_region, wcss->mem_phys, wcss->mem_size, &wcss->mem_reloc); if (ret) return ret; diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index dfd15d189087..74c415774657 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -331,7 +331,7 @@ static bool qcom_mdt_bins_are_split(const struct firmware *fw, const char *fw_na } static int __qcom_mdt_load(struct device *dev, const struct firmware *fw, - const char *fw_name, int pas_id, void *mem_region, + const char *fw_name, void *mem_region, phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base) { @@ -458,7 +458,7 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw, if (ret) return ret; - return __qcom_mdt_load(dev, fw, firmware, pas_id, mem_region, mem_phys, + return __qcom_mdt_load(dev, fw, firmware, mem_region, mem_phys, mem_size, reloc_base); } EXPORT_SYMBOL_GPL(qcom_mdt_load); @@ -468,7 +468,6 @@ EXPORT_SYMBOL_GPL(qcom_mdt_load); * @dev: device handle to associate resources with * @fw: firmware object for the mdt file * @firmware: name of the firmware, for construction of segment file names - * @pas_id: PAS identifier * @mem_region: allocated memory region to load firmware into * @mem_phys: physical address of allocated memory region * @mem_size: size of the allocated memory region @@ -477,11 +476,10 @@ EXPORT_SYMBOL_GPL(qcom_mdt_load); * Returns 0 on success, negative errno otherwise. */ int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, - const char *firmware, int pas_id, - void *mem_region, phys_addr_t mem_phys, + const char *firmware, void *mem_region, phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base) { - return __qcom_mdt_load(dev, fw, firmware, pas_id, mem_region, mem_phys, + return __qcom_mdt_load(dev, fw, firmware, mem_region, mem_phys, mem_size, reloc_base); } EXPORT_SYMBOL_GPL(qcom_mdt_load_no_init); diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index 9e8e60421192..8ea8230579a2 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -24,7 +24,7 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw, phys_addr_t *reloc_base); int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, - const char *fw_name, int pas_id, void *mem_region, + const char *fw_name, void *mem_region, phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base); void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len, @@ -54,9 +54,8 @@ static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw, static inline int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, - const char *fw_name, int pas_id, - void *mem_region, phys_addr_t mem_phys, - size_t mem_size, + const char *fw_name, void *mem_region, + phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base) { return -ENODEV; -- cgit v1.2.3 From ce8370e2e62a903e18be7dd0e0be2eee079501e1 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 6 Aug 2025 17:04:07 -0400 Subject: audit: record fanotify event regardless of presence of rules When no audit rules are in place, fanotify event results are unconditionally dropped due to an explicit check for the existence of any audit rules. Given this is a report from another security sub-system, allow it to be recorded regardless of the existence of any audit rules. To test, install and run the fapolicyd daemon with default config. Then as an unprivileged user, create and run a very simple binary that should be denied. Then check for an event with ausearch -m FANOTIFY -ts recent Link: https://issues.redhat.com/browse/RHEL-9065 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index a394614ccd0b..e3f06eba9c6e 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -527,7 +527,7 @@ static inline void audit_log_kern_module(const char *name) static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar) { - if (!audit_dummy_context()) + if (audit_enabled) __audit_fanotify(response, friar); } -- cgit v1.2.3 From 5816bf4273edb32716a88c796e0b04f0e12962eb Mon Sep 17 00:00:00 2001 From: Blaise Boscaccy Date: Tue, 22 Jul 2025 14:21:34 -0700 Subject: lsm,selinux: Add LSM blob support for BPF objects This patch introduces LSM blob support for BPF maps, programs, and tokens to enable LSM stacking and multiplexing of LSM modules that govern BPF objects. Additionally, the existing BPF hooks used by SELinux have been updated to utilize the new blob infrastructure, removing the assumption of exclusive ownership of the security pointer. Signed-off-by: Blaise Boscaccy [PM: dropped local variable init, style fixes] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 3 ++ security/security.c | 86 +++++++++++++++++++++++++++++++++++++-- security/selinux/hooks.c | 56 +++++-------------------- security/selinux/include/objsec.h | 20 +++++++++ 4 files changed, 116 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 090d1d3e19fe..79ec5a2bdcca 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -116,6 +116,9 @@ struct lsm_blob_sizes { int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ int lbs_tun_dev; int lbs_bdev; + int lbs_bpf_map; + int lbs_bpf_prog; + int lbs_bpf_token; }; /* diff --git a/security/security.c b/security/security.c index a88ebfca3224..ca126b02d2fe 100644 --- a/security/security.c +++ b/security/security.c @@ -283,6 +283,9 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) lsm_set_blob_size(&needed->lbs_xattr_count, &blob_sizes.lbs_xattr_count); lsm_set_blob_size(&needed->lbs_bdev, &blob_sizes.lbs_bdev); + lsm_set_blob_size(&needed->lbs_bpf_map, &blob_sizes.lbs_bpf_map); + lsm_set_blob_size(&needed->lbs_bpf_prog, &blob_sizes.lbs_bpf_prog); + lsm_set_blob_size(&needed->lbs_bpf_token, &blob_sizes.lbs_bpf_token); } /* Prepare LSM for initialization. */ @@ -480,6 +483,9 @@ static void __init ordered_lsm_init(void) init_debug("tun device blob size = %d\n", blob_sizes.lbs_tun_dev); init_debug("xattr slots = %d\n", blob_sizes.lbs_xattr_count); init_debug("bdev blob size = %d\n", blob_sizes.lbs_bdev); + init_debug("bpf map blob size = %d\n", blob_sizes.lbs_bpf_map); + init_debug("bpf prog blob size = %d\n", blob_sizes.lbs_bpf_prog); + init_debug("bpf token blob size = %d\n", blob_sizes.lbs_bpf_token); /* * Create any kmem_caches needed for blobs @@ -827,6 +833,47 @@ static int lsm_bdev_alloc(struct block_device *bdev) GFP_KERNEL); } +#ifdef CONFIG_BPF_SYSCALL +/** + * lsm_bpf_map_alloc - allocate a composite bpf_map blob + * @map: the bpf_map that needs a blob + * + * Allocate the bpf_map blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_bpf_map_alloc(struct bpf_map *map) +{ + return lsm_blob_alloc(&map->security, blob_sizes.lbs_bpf_map, GFP_KERNEL); +} + +/** + * lsm_bpf_prog_alloc - allocate a composite bpf_prog blob + * @prog: the bpf_prog that needs a blob + * + * Allocate the bpf_prog blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_bpf_prog_alloc(struct bpf_prog *prog) +{ + return lsm_blob_alloc(&prog->aux->security, blob_sizes.lbs_bpf_prog, GFP_KERNEL); +} + +/** + * lsm_bpf_token_alloc - allocate a composite bpf_token blob + * @token: the bpf_token that needs a blob + * + * Allocate the bpf_token blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_bpf_token_alloc(struct bpf_token *token) +{ + return lsm_blob_alloc(&token->security, blob_sizes.lbs_bpf_token, GFP_KERNEL); +} +#endif /* CONFIG_BPF_SYSCALL */ + /** * lsm_early_task - during initialization allocate a composite task blob * @task: the task that needs a blob @@ -5706,7 +5753,16 @@ int security_bpf_prog(struct bpf_prog *prog) int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, struct bpf_token *token, bool kernel) { - return call_int_hook(bpf_map_create, map, attr, token, kernel); + int rc; + + rc = lsm_bpf_map_alloc(map); + if (unlikely(rc)) + return rc; + + rc = call_int_hook(bpf_map_create, map, attr, token, kernel); + if (unlikely(rc)) + security_bpf_map_free(map); + return rc; } /** @@ -5725,7 +5781,16 @@ int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, struct bpf_token *token, bool kernel) { - return call_int_hook(bpf_prog_load, prog, attr, token, kernel); + int rc; + + rc = lsm_bpf_prog_alloc(prog); + if (unlikely(rc)) + return rc; + + rc = call_int_hook(bpf_prog_load, prog, attr, token, kernel); + if (unlikely(rc)) + security_bpf_prog_free(prog); + return rc; } /** @@ -5742,7 +5807,16 @@ int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, const struct path *path) { - return call_int_hook(bpf_token_create, token, attr, path); + int rc; + + rc = lsm_bpf_token_alloc(token); + if (unlikely(rc)) + return rc; + + rc = call_int_hook(bpf_token_create, token, attr, path); + if (unlikely(rc)) + security_bpf_token_free(token); + return rc; } /** @@ -5786,6 +5860,8 @@ int security_bpf_token_capable(const struct bpf_token *token, int cap) void security_bpf_map_free(struct bpf_map *map) { call_void_hook(bpf_map_free, map); + kfree(map->security); + map->security = NULL; } /** @@ -5797,6 +5873,8 @@ void security_bpf_map_free(struct bpf_map *map) void security_bpf_prog_free(struct bpf_prog *prog) { call_void_hook(bpf_prog_free, prog); + kfree(prog->aux->security); + prog->aux->security = NULL; } /** @@ -5808,6 +5886,8 @@ void security_bpf_prog_free(struct bpf_prog *prog) void security_bpf_token_free(struct bpf_token *token) { call_void_hook(bpf_token_free, token); + kfree(token->security); + token->security = NULL; } #endif /* CONFIG_BPF_SYSCALL */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c95a5874bf7d..4da5e792b42e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -7062,14 +7062,14 @@ static int bpf_fd_pass(const struct file *file, u32 sid) if (file->f_op == &bpf_map_fops) { map = file->private_data; - bpfsec = map->security; + bpfsec = selinux_bpf_map_security(map); ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, bpf_map_fmode_to_av(file->f_mode), NULL); if (ret) return ret; } else if (file->f_op == &bpf_prog_fops) { prog = file->private_data; - bpfsec = prog->aux->security; + bpfsec = selinux_bpf_prog_security(prog); ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, BPF__PROG_RUN, NULL); if (ret) @@ -7083,7 +7083,7 @@ static int selinux_bpf_map(struct bpf_map *map, fmode_t fmode) u32 sid = current_sid(); struct bpf_security_struct *bpfsec; - bpfsec = map->security; + bpfsec = selinux_bpf_map_security(map); return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, bpf_map_fmode_to_av(fmode), NULL); } @@ -7093,7 +7093,7 @@ static int selinux_bpf_prog(struct bpf_prog *prog) u32 sid = current_sid(); struct bpf_security_struct *bpfsec; - bpfsec = prog->aux->security; + bpfsec = selinux_bpf_prog_security(prog); return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, BPF__PROG_RUN, NULL); } @@ -7103,69 +7103,33 @@ static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, { struct bpf_security_struct *bpfsec; - bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL); - if (!bpfsec) - return -ENOMEM; - + bpfsec = selinux_bpf_map_security(map); bpfsec->sid = current_sid(); - map->security = bpfsec; return 0; } -static void selinux_bpf_map_free(struct bpf_map *map) -{ - struct bpf_security_struct *bpfsec = map->security; - - map->security = NULL; - kfree(bpfsec); -} - static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, struct bpf_token *token, bool kernel) { struct bpf_security_struct *bpfsec; - bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL); - if (!bpfsec) - return -ENOMEM; - + bpfsec = selinux_bpf_prog_security(prog); bpfsec->sid = current_sid(); - prog->aux->security = bpfsec; return 0; } -static void selinux_bpf_prog_free(struct bpf_prog *prog) -{ - struct bpf_security_struct *bpfsec = prog->aux->security; - - prog->aux->security = NULL; - kfree(bpfsec); -} - static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, const struct path *path) { struct bpf_security_struct *bpfsec; - bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL); - if (!bpfsec) - return -ENOMEM; - + bpfsec = selinux_bpf_token_security(token); bpfsec->sid = current_sid(); - token->security = bpfsec; return 0; } - -static void selinux_bpf_token_free(struct bpf_token *token) -{ - struct bpf_security_struct *bpfsec = token->security; - - token->security = NULL; - kfree(bpfsec); -} #endif struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = { @@ -7183,6 +7147,9 @@ struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = { .lbs_xattr_count = SELINUX_INODE_INIT_XATTRS, .lbs_tun_dev = sizeof(struct tun_security_struct), .lbs_ib = sizeof(struct ib_security_struct), + .lbs_bpf_map = sizeof(struct bpf_security_struct), + .lbs_bpf_prog = sizeof(struct bpf_security_struct), + .lbs_bpf_token = sizeof(struct bpf_security_struct), }; #ifdef CONFIG_PERF_EVENTS @@ -7536,9 +7503,6 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(bpf, selinux_bpf), LSM_HOOK_INIT(bpf_map, selinux_bpf_map), LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog), - LSM_HOOK_INIT(bpf_map_free, selinux_bpf_map_free), - LSM_HOOK_INIT(bpf_prog_free, selinux_bpf_prog_free), - LSM_HOOK_INIT(bpf_token_free, selinux_bpf_token_free), #endif #ifdef CONFIG_PERF_EVENTS diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 1d7ac59015a1..2d5139c6d45b 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -26,6 +26,7 @@ #include #include #include +#include #include "flask.h" #include "avc.h" @@ -245,4 +246,23 @@ selinux_perf_event(void *perf_event) return perf_event + selinux_blob_sizes.lbs_perf_event; } +#ifdef CONFIG_BPF_SYSCALL +static inline struct bpf_security_struct * +selinux_bpf_map_security(struct bpf_map *map) +{ + return map->security + selinux_blob_sizes.lbs_bpf_map; +} + +static inline struct bpf_security_struct * +selinux_bpf_prog_security(struct bpf_prog *prog) +{ + return prog->aux->security + selinux_blob_sizes.lbs_bpf_prog; +} + +static inline struct bpf_security_struct * +selinux_bpf_token_security(struct bpf_token *token) +{ + return token->security + selinux_blob_sizes.lbs_bpf_token; +} +#endif /* CONFIG_BPF_SYSCALL */ #endif /* _SELINUX_OBJSEC_H_ */ -- cgit v1.2.3 From fb357dbadbebc7a9ca3c5ef26f6c792b0e8e1278 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 14:24:42 +0200 Subject: fbcon: Add necessary include statements and forward declarations Make the header self contained for including. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20250715122643.137027-6-tzimmermann@suse.de --- include/linux/fbcon.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 2382dec6d6ab..81f0e698acbf 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -1,6 +1,13 @@ #ifndef _LINUX_FBCON_H #define _LINUX_FBCON_H +#include + +struct fb_blit_caps; +struct fb_info; +struct fb_var_screeninfo; +struct fb_videomode; + #ifdef CONFIG_FRAMEBUFFER_CONSOLE void __init fb_console_init(void); void __exit fb_console_exit(void); -- cgit v1.2.3 From bea90085dcb0f9a75748e73d723bde557a5ebf1a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 23 Jul 2025 11:21:53 -0400 Subject: dlm: use defines for force values in dlm_release_lockspace Clarify the use of the force parameter by renaming it to "release_option" and adding defines (with descriptions) for each of the accepted values. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- drivers/md/md-cluster.c | 4 ++-- fs/dlm/lockspace.c | 20 +++++++++----------- fs/dlm/user.c | 6 +++--- fs/gfs2/lock_dlm.c | 4 ++-- fs/ocfs2/stack_user.c | 2 +- include/linux/dlm.h | 26 +++++++++++++++++++++++++- 6 files changed, 42 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 5497eaee96e7..6e9a0045f0ff 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -979,7 +979,7 @@ err: lockres_free(cinfo->resync_lockres); lockres_free(cinfo->bitmap_lockres); if (cinfo->lockspace) - dlm_release_lockspace(cinfo->lockspace, 2); + dlm_release_lockspace(cinfo->lockspace, DLM_RELEASE_NORMAL); mddev->cluster_info = NULL; kfree(cinfo); return ret; @@ -1042,7 +1042,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->resync_lockres); lockres_free(cinfo->bitmap_lockres); unlock_all_bitmaps(mddev); - dlm_release_lockspace(cinfo->lockspace, 2); + dlm_release_lockspace(cinfo->lockspace, DLM_RELEASE_NORMAL); kfree(cinfo); return 0; } diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index ee11a70def92..be8dbf482229 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -671,19 +671,20 @@ int dlm_new_user_lockspace(const char *name, const char *cluster, This is because there may be LKBs queued as ASTs that have been unlinked from their RSBs and are pending deletion once the AST has been delivered */ -static int lockspace_busy(struct dlm_ls *ls, int force) +static int lockspace_busy(struct dlm_ls *ls, int release_option) { struct dlm_lkb *lkb; unsigned long id; int rv = 0; read_lock_bh(&ls->ls_lkbxa_lock); - if (force == 0) { + if (release_option == DLM_RELEASE_NO_LOCKS) { xa_for_each(&ls->ls_lkbxa, id, lkb) { rv = 1; break; } - } else if (force == 1) { + } else if (release_option == DLM_RELEASE_UNUSED) { + /* TODO: handle this UNUSED option as NO_LOCKS in later patch */ xa_for_each(&ls->ls_lkbxa, id, lkb) { if (lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV) { @@ -698,11 +699,11 @@ static int lockspace_busy(struct dlm_ls *ls, int force) return rv; } -static int release_lockspace(struct dlm_ls *ls, int force) +static int release_lockspace(struct dlm_ls *ls, int release_option) { int busy, rv; - busy = lockspace_busy(ls, force); + busy = lockspace_busy(ls, release_option); spin_lock_bh(&lslist_lock); if (ls->ls_create_count == 1) { @@ -730,7 +731,8 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_device_deregister(ls); - if (force != 3 && dlm_user_daemon_available()) + if (release_option != DLM_RELEASE_NO_EVENT && + dlm_user_daemon_available()) do_uevent(ls, 0); dlm_recoverd_stop(ls); @@ -782,11 +784,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) * lockspace must continue to function as usual, participating in recoveries, * until this returns. * - * Force has 4 possible values: - * 0 - don't destroy lockspace if it has any LKBs - * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs - * 2 - destroy lockspace regardless of LKBs - * 3 - destroy lockspace as part of a forced shutdown + * See DLM_RELEASE defines for release_option values and their meaning. */ int dlm_release_lockspace(void *lockspace, int force) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 5cb3896be826..51daf4acbe31 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -425,7 +425,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) dlm_put_lockspace(ls); if (error) - dlm_release_lockspace(lockspace, 0); + dlm_release_lockspace(lockspace, DLM_RELEASE_NO_LOCKS); else error = ls->ls_device.minor; @@ -436,7 +436,7 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) { dlm_lockspace_t *lockspace; struct dlm_ls *ls; - int error, force = 0; + int error, force = DLM_RELEASE_NO_LOCKS; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -446,7 +446,7 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) return -ENOENT; if (params->flags & DLM_USER_LSFLG_FORCEFREE) - force = 2; + force = DLM_RELEASE_NORMAL; lockspace = ls; dlm_put_lockspace(ls); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index cee5d199d2d8..aac4dd6d0381 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1400,7 +1400,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) return 0; fail_release: - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); fail_free: free_recover_size(ls); fail: @@ -1437,7 +1437,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) /* mounted_lock and control_lock will be purged in dlm recovery */ release: if (ls->ls_dlm) { - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); ls->ls_dlm = NULL; } diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 0f045e45fa0c..765105f1ff8a 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -952,7 +952,7 @@ static const struct dlm_lockspace_ops ocfs2_ls_ops = { static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) { version_unlock(conn); - dlm_release_lockspace(conn->cc_lockspace, 2); + dlm_release_lockspace(conn->cc_lockspace, DLM_RELEASE_NORMAL); conn->cc_lockspace = NULL; ocfs2_live_connection_drop(conn->cc_private); conn->cc_private = NULL; diff --git a/include/linux/dlm.h b/include/linux/dlm.h index bacda9898f2b..cc7a36244893 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -87,13 +87,37 @@ int dlm_new_lockspace(const char *name, const char *cluster, const struct dlm_lockspace_ops *ops, void *ops_arg, int *ops_result, dlm_lockspace_t **lockspace); +/* + * dlm_release_lockspace() release_option values: + * + * DLM_RELEASE_NO_LOCKS returns -EBUSY if any locks (lkb's) + * exist in the local lockspace. + * + * DLM_RELEASE_UNUSED previous value that is no longer used. + * + * DLM_RELEASE_NORMAL releases the lockspace regardless of any + * locks managed in the local lockspace. + * + * DLM_RELEASE_NO_EVENT release the lockspace regardless of any + * locks managed in the local lockspace, and does not submit + * a leave event to the cluster manager, so other nodes will + * not be notified that the node should be removed from the + * list of lockspace members. + */ +#define DLM_RELEASE_NO_LOCKS 0 +#define DLM_RELEASE_UNUSED 1 +#define DLM_RELEASE_NORMAL 2 +#define DLM_RELEASE_NO_EVENT 3 + /* * dlm_release_lockspace * * Stop a lockspace. + * + * release_option: see DLM_RELEASE values above. */ -int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); +int dlm_release_lockspace(dlm_lockspace_t *lockspace, int release_option); /* * dlm_lock -- cgit v1.2.3 From 6f4f4ca5caf73de5e86329547d4527b3e0c08488 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 23 Jul 2025 11:21:56 -0400 Subject: dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release When dlm_lockspace_release() is passed DLM_RELEASE_RECOVER, it tells the dlm to handle the release/leave as if the node had failed, i.e. perform recovery steps for a failed node, like recover_slot(). When DLM_RELEASE_RECOVER is set: - dlm_release_lockspace() includes RELEASE_RECOVER=1 in the OFFLINE uevent sent to userspace. - userspace/dlm_controld sends a message to all lockspace members indicating that the subsequent node removal should be handled as if the node had failed. - when dlm_controld on all nodes receives the new message, it sets the release_recover configfs entry to 1 for the node. - when the dlm/kernel next performs recovery and removes the node, it will see that release_recover has been set, and will perform recovery steps for the node as if it had failed, e.g. the recover_slot() callback is called to notify the fs. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lockspace.c | 2 +- fs/dlm/member.c | 14 ++++++++++---- include/linux/dlm.h | 5 +++++ 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 6ff666a511c7..d986b7ef153d 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -738,7 +738,7 @@ static int release_lockspace(struct dlm_ls *ls, int release_option) if (release_option != DLM_RELEASE_NO_EVENT && dlm_user_daemon_available()) - do_uevent(ls, 0, 0); + do_uevent(ls, 0, (release_option == DLM_RELEASE_RECOVER)); dlm_recoverd_stop(ls); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 152d2cb16f59..356337102015 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -478,7 +478,8 @@ static void dlm_lsop_recover_prep(struct dlm_ls *ls) ls->ls_ops->recover_prep(ls->ls_ops_arg); } -static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) +static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb, + unsigned int release_recover) { struct dlm_slot slot; uint32_t seq; @@ -495,7 +496,7 @@ static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) error = dlm_comm_seq(memb->nodeid, &seq, false); - if (!error && seq == memb->comm_seq) + if (!release_recover && !error && seq == memb->comm_seq) return; slot.nodeid = memb->nodeid; @@ -552,6 +553,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) struct dlm_member *memb, *safe; struct dlm_config_node *node; int i, error, neg = 0, low = -1; + unsigned int release_recover; /* previously removed members that we've not finished removing need to * count as a negative change so the "neg" recovery steps will happen @@ -572,8 +574,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) if (node && !node->new && !node->gone) continue; + release_recover = 0; + if (node->gone) { - log_rinfo(ls, "remove member %d", memb->nodeid); + release_recover = node->release_recover; + log_rinfo(ls, "remove member %d%s", memb->nodeid, + release_recover ? " (release_recover)" : ""); } else { /* removed and re-added */ log_rinfo(ls, "remove member %d comm_seq %u %u", @@ -584,7 +590,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) list_move(&memb->list, &ls->ls_nodes_gone); remove_remote_member(memb->nodeid); ls->ls_num_nodes--; - dlm_lsop_recover_slot(ls, memb); + dlm_lsop_recover_slot(ls, memb, release_recover); } /* add new members to ls_nodes */ diff --git a/include/linux/dlm.h b/include/linux/dlm.h index cc7a36244893..108eb953eb18 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -103,11 +103,16 @@ int dlm_new_lockspace(const char *name, const char *cluster, * a leave event to the cluster manager, so other nodes will * not be notified that the node should be removed from the * list of lockspace members. + * + * DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining + * nodes will handle the removal of the node as if the node + * had failed, e.g. the recover_slot() callback would be used. */ #define DLM_RELEASE_NO_LOCKS 0 #define DLM_RELEASE_UNUSED 1 #define DLM_RELEASE_NORMAL 2 #define DLM_RELEASE_NO_EVENT 3 +#define DLM_RELEASE_RECOVER 4 /* * dlm_release_lockspace -- cgit v1.2.3 From 534b9bdeb4b80d843ca9f924524d4d103ad6605e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 29 Jun 2025 22:52:45 -0700 Subject: Input: tca6416-keypad - remove the driver This input driver predates proper GPIO driver for the chip and now can be replaced with the generic gpio-keys. Remove the driver. Link: https://lore.kernel.org/r/ajfsei3keh4jjasd4lshjicgqixew7bak3cmty3suoliskzgz4@vj3ijycfxy4i Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 18 -- drivers/input/keyboard/Makefile | 1 - drivers/input/keyboard/tca6416-keypad.c | 305 -------------------------------- include/linux/tca6416_keypad.h | 30 ---- 4 files changed, 354 deletions(-) delete mode 100644 drivers/input/keyboard/tca6416-keypad.c delete mode 100644 include/linux/tca6416_keypad.h (limited to 'include/linux') diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 7c4f309a4cb6..f3474026d192 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -262,24 +262,6 @@ config KEYBOARD_GPIO_POLLED To compile this driver as a module, choose M here: the module will be called gpio_keys_polled. -config KEYBOARD_TCA6416 - tristate "TCA6416/TCA6408A Keypad Support" - depends on I2C - help - This driver implements basic keypad functionality - for keys connected through TCA6416/TCA6408A IO expanders. - - Say Y here if your device has keys connected to - TCA6416/TCA6408A IO expander. Your board-specific setup logic - must also provide pin-mask details(of which TCA6416 pins - are used for keypad). - - If enabled the entire TCA6416 device will be managed through - this driver. - - To compile this driver as a module, choose M here: the - module will be called tca6416_keypad. - config KEYBOARD_TCA8418 tristate "TCA8418 Keypad Support" depends on I2C diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 8bc20ab2b103..c0134da14580 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_KEYBOARD_EP93XX) += ep93xx_keypad.o obj-$(CONFIG_KEYBOARD_GOLDFISH_EVENTS) += goldfish_events.o obj-$(CONFIG_KEYBOARD_GPIO) += gpio_keys.o obj-$(CONFIG_KEYBOARD_GPIO_POLLED) += gpio_keys_polled.o -obj-$(CONFIG_KEYBOARD_TCA6416) += tca6416-keypad.o obj-$(CONFIG_KEYBOARD_TCA8418) += tca8418_keypad.o obj-$(CONFIG_KEYBOARD_HIL) += hil_kbd.o obj-$(CONFIG_KEYBOARD_HIL_OLD) += hilkbd.o diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c deleted file mode 100644 index fbc674d7b9f0..000000000000 --- a/drivers/input/keyboard/tca6416-keypad.c +++ /dev/null @@ -1,305 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Driver for keys on TCA6416 I2C IO expander - * - * Copyright (C) 2010 Texas Instruments - * - * Author : Sriramakrishnan.A.G. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define TCA6416_INPUT 0 -#define TCA6416_OUTPUT 1 -#define TCA6416_INVERT 2 -#define TCA6416_DIRECTION 3 - -#define TCA6416_POLL_INTERVAL 100 /* msec */ - -static const struct i2c_device_id tca6416_id[] = { - { "tca6416-keys", 16, }, - { "tca6408-keys", 8, }, - { } -}; -MODULE_DEVICE_TABLE(i2c, tca6416_id); - -struct tca6416_keypad_chip { - uint16_t reg_output; - uint16_t reg_direction; - uint16_t reg_input; - - struct i2c_client *client; - struct input_dev *input; - int io_size; - u16 pinmask; - bool use_polling; - struct tca6416_button buttons[]; -}; - -static int tca6416_write_reg(struct tca6416_keypad_chip *chip, int reg, u16 val) -{ - int error; - - error = chip->io_size > 8 ? - i2c_smbus_write_word_data(chip->client, reg << 1, val) : - i2c_smbus_write_byte_data(chip->client, reg, val); - if (error < 0) { - dev_err(&chip->client->dev, - "%s failed, reg: %d, val: %d, error: %d\n", - __func__, reg, val, error); - return error; - } - - return 0; -} - -static int tca6416_read_reg(struct tca6416_keypad_chip *chip, int reg, u16 *val) -{ - int retval; - - retval = chip->io_size > 8 ? - i2c_smbus_read_word_data(chip->client, reg << 1) : - i2c_smbus_read_byte_data(chip->client, reg); - if (retval < 0) { - dev_err(&chip->client->dev, "%s failed, reg: %d, error: %d\n", - __func__, reg, retval); - return retval; - } - - *val = (u16)retval; - return 0; -} - -static void tca6416_keys_scan(struct input_dev *input) -{ - struct tca6416_keypad_chip *chip = input_get_drvdata(input); - u16 reg_val, val; - int error, i, pin_index; - - error = tca6416_read_reg(chip, TCA6416_INPUT, ®_val); - if (error) - return; - - reg_val &= chip->pinmask; - - /* Figure out which lines have changed */ - val = reg_val ^ chip->reg_input; - chip->reg_input = reg_val; - - for (i = 0, pin_index = 0; i < 16; i++) { - if (val & (1 << i)) { - struct tca6416_button *button = &chip->buttons[pin_index]; - unsigned int type = button->type ?: EV_KEY; - int state = ((reg_val & (1 << i)) ? 1 : 0) - ^ button->active_low; - - input_event(input, type, button->code, !!state); - input_sync(input); - } - - if (chip->pinmask & (1 << i)) - pin_index++; - } -} - -/* - * This is threaded IRQ handler and this can (and will) sleep. - */ -static irqreturn_t tca6416_keys_isr(int irq, void *dev_id) -{ - tca6416_keys_scan(dev_id); - - return IRQ_HANDLED; -} - -static int tca6416_keys_open(struct input_dev *dev) -{ - struct tca6416_keypad_chip *chip = input_get_drvdata(dev); - - if (!chip->use_polling) { - /* Get initial device state in case it has switches */ - tca6416_keys_scan(dev); - enable_irq(chip->client->irq); - } - - return 0; -} - -static void tca6416_keys_close(struct input_dev *dev) -{ - struct tca6416_keypad_chip *chip = input_get_drvdata(dev); - - if (!chip->use_polling) - disable_irq(chip->client->irq); -} - -static int tca6416_setup_registers(struct tca6416_keypad_chip *chip) -{ - int error; - - error = tca6416_read_reg(chip, TCA6416_OUTPUT, &chip->reg_output); - if (error) - return error; - - error = tca6416_read_reg(chip, TCA6416_DIRECTION, &chip->reg_direction); - if (error) - return error; - - /* ensure that keypad pins are set to input */ - error = tca6416_write_reg(chip, TCA6416_DIRECTION, - chip->reg_direction | chip->pinmask); - if (error) - return error; - - error = tca6416_read_reg(chip, TCA6416_DIRECTION, &chip->reg_direction); - if (error) - return error; - - error = tca6416_read_reg(chip, TCA6416_INPUT, &chip->reg_input); - if (error) - return error; - - chip->reg_input &= chip->pinmask; - - return 0; -} - -static int tca6416_keypad_probe(struct i2c_client *client) -{ - const struct i2c_device_id *id = i2c_client_get_device_id(client); - struct tca6416_keys_platform_data *pdata; - struct tca6416_keypad_chip *chip; - struct input_dev *input; - int error; - int i; - - /* Check functionality */ - if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) { - dev_err(&client->dev, "%s adapter not supported\n", - dev_driver_string(&client->adapter->dev)); - return -ENODEV; - } - - pdata = dev_get_platdata(&client->dev); - if (!pdata) { - dev_dbg(&client->dev, "no platform data\n"); - return -EINVAL; - } - - chip = devm_kzalloc(&client->dev, - struct_size(chip, buttons, pdata->nbuttons), - GFP_KERNEL); - if (!chip) - return -ENOMEM; - - input = devm_input_allocate_device(&client->dev); - if (!input) - return -ENOMEM; - - chip->client = client; - chip->input = input; - chip->io_size = id->driver_data; - chip->pinmask = pdata->pinmask; - chip->use_polling = pdata->use_polling; - - input->phys = "tca6416-keys/input0"; - input->name = client->name; - - input->open = tca6416_keys_open; - input->close = tca6416_keys_close; - - input->id.bustype = BUS_HOST; - input->id.vendor = 0x0001; - input->id.product = 0x0001; - input->id.version = 0x0100; - - /* Enable auto repeat feature of Linux input subsystem */ - if (pdata->rep) - __set_bit(EV_REP, input->evbit); - - for (i = 0; i < pdata->nbuttons; i++) { - unsigned int type; - - chip->buttons[i] = pdata->buttons[i]; - type = (pdata->buttons[i].type) ?: EV_KEY; - input_set_capability(input, type, pdata->buttons[i].code); - } - - input_set_drvdata(input, chip); - - /* - * Initialize cached registers from their original values. - * we can't share this chip with another i2c master. - */ - error = tca6416_setup_registers(chip); - if (error) - return error; - - if (chip->use_polling) { - error = input_setup_polling(input, tca6416_keys_scan); - if (error) { - dev_err(&client->dev, "Failed to setup polling\n"); - return error; - } - - input_set_poll_interval(input, TCA6416_POLL_INTERVAL); - } else { - error = devm_request_threaded_irq(&client->dev, client->irq, - NULL, tca6416_keys_isr, - IRQF_TRIGGER_FALLING | - IRQF_ONESHOT | - IRQF_NO_AUTOEN, - "tca6416-keypad", input); - if (error) { - dev_dbg(&client->dev, - "Unable to claim irq %d; error %d\n", - client->irq, error); - return error; - } - } - - error = input_register_device(input); - if (error) { - dev_dbg(&client->dev, - "Unable to register input device, error: %d\n", error); - return error; - } - - i2c_set_clientdata(client, chip); - - return 0; -} - -static struct i2c_driver tca6416_keypad_driver = { - .driver = { - .name = "tca6416-keypad", - }, - .probe = tca6416_keypad_probe, - .id_table = tca6416_id, -}; - -static int __init tca6416_keypad_init(void) -{ - return i2c_add_driver(&tca6416_keypad_driver); -} - -subsys_initcall(tca6416_keypad_init); - -static void __exit tca6416_keypad_exit(void) -{ - i2c_del_driver(&tca6416_keypad_driver); -} -module_exit(tca6416_keypad_exit); - -MODULE_AUTHOR("Sriramakrishnan "); -MODULE_DESCRIPTION("Keypad driver over tca6416 IO expander"); -MODULE_LICENSE("GPL"); diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h deleted file mode 100644 index 5cf6f6f82aa7..000000000000 --- a/include/linux/tca6416_keypad.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tca6416 keypad platform support - * - * Copyright (C) 2010 Texas Instruments - * - * Author: Sriramakrishnan - */ - -#ifndef _TCA6416_KEYS_H -#define _TCA6416_KEYS_H - -#include - -struct tca6416_button { - /* Configuration parameters */ - int code; /* input event code (KEY_*, SW_*) */ - int active_low; - int type; /* input event type (EV_KEY, EV_SW) */ -}; - -struct tca6416_keys_platform_data { - struct tca6416_button *buttons; - int nbuttons; - unsigned int rep:1; /* enable input subsystem auto repeat */ - uint16_t pinmask; - uint16_t invert; - int use_polling; /* use polling if Interrupt is not connected*/ -}; -#endif -- cgit v1.2.3 From c93c59baa5ab57e94b874000cec56e26611b7a23 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 11 Aug 2025 20:58:20 +0200 Subject: bpf: Tidy verifier bug message Yonghong noticed that error messages for potential verifier bugs often have a '(1)' at the end. This is happening because verifier_bug_if(cond, env, fmt, args...) prints "(" #cond ")\n" as part of the message and verifier_bug() is defined as: #define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args) Hence, verifier_bug() always ends up displaying '(1)'. This small patch fixes it by having verifier_bug_if conditionally call verifier_bug instead of the other way around. Fixes: 1cb0f56d9618 ("bpf: WARN_ONCE on verifier bugs") Reported-by: Yonghong Song Signed-off-by: Paul Chaignon Signed-off-by: Andrii Nakryiko Tested-by: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/aJo9THBrzo8jFXsh@mail.gmail.com --- include/linux/bpf_verifier.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c823f8efe3ed..020de62bd09c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -875,13 +875,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, #define verifier_bug_if(cond, env, fmt, args...) \ ({ \ bool __cond = (cond); \ - if (unlikely(__cond)) { \ - BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \ - bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \ - } \ + if (unlikely(__cond)) \ + verifier_bug(env, fmt " (" #cond ")", ##args); \ (__cond); \ }) -#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args) +#define verifier_bug(env, fmt, args...) \ + ({ \ + BPF_WARN_ONCE(1, "verifier bug: " fmt "\n", ##args); \ + bpf_log(&env->log, "verifier bug: " fmt "\n", ##args); \ + }) static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { -- cgit v1.2.3 From 07bbbfe7addf5b032e04f3c38f0b183d067a3f0d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 11 Aug 2025 19:50:43 +0100 Subject: net: stmmac: add suspend()/resume() platform ops Add suspend/resume platform operations, which, when populated, override the init/exit platform operations when we suspend and resume. These suspend()/resume() methods are called by core code, and thus are designed to support any struct device, not just platform devices. This allows them to be used by the PCI drivers we have. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/E1ulXbX-008gqZ-Bb@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++++++++ drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 12 ++++++++---- include/linux/stmmac.h | 2 ++ 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f1abf4242cd2..2da4f7bb2899 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7879,6 +7879,9 @@ int stmmac_suspend(struct device *dev) if (stmmac_fpe_supported(priv)) ethtool_mmsv_stop(&priv->fpe_cfg.mmsv); + if (priv->plat->suspend) + return priv->plat->suspend(dev, priv->plat->bsp_priv); + return 0; } EXPORT_SYMBOL_GPL(stmmac_suspend); @@ -7931,6 +7934,12 @@ int stmmac_resume(struct device *dev) struct stmmac_priv *priv = netdev_priv(ndev); int ret; + if (priv->plat->resume) { + ret = priv->plat->resume(dev, priv->plat->bsp_priv); + if (ret) + return ret; + } + if (!netif_running(ndev)) return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 030fcf1b5993..21df052eeed0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -901,7 +901,9 @@ static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); ret = stmmac_suspend(dev); - stmmac_pltfr_exit(pdev, priv->plat); + + if (!priv->plat->suspend) + stmmac_pltfr_exit(pdev, priv->plat); return ret; } @@ -920,9 +922,11 @@ static int __maybe_unused stmmac_pltfr_resume(struct device *dev) struct platform_device *pdev = to_platform_device(dev); int ret; - ret = stmmac_pltfr_init(pdev, priv->plat); - if (ret) - return ret; + if (!priv->plat->resume) { + ret = stmmac_pltfr_init(pdev, priv->plat); + if (ret) + return ret; + } return stmmac_resume(dev); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 26ddf95d23f9..22c24dacbc65 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -248,6 +248,8 @@ struct plat_stmmacenet_data { void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); + int (*suspend)(struct device *dev, void *priv); + int (*resume)(struct device *dev, void *priv); struct mac_device_info *(*setup)(void *priv); int (*clks_config)(void *priv, bool enabled); int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, -- cgit v1.2.3 From b3ef7bdec66fb1813e865fd39d179a93cefd2015 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 11 Aug 2025 17:31:42 +0200 Subject: net: airoha: Add airoha_offload.h header Move NPU definitions to airoha_offload.h in include/linux/soc/airoha/ in order to allow the MT76 driver to access the callback definitions. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250811-airoha-en7581-wlan-offlaod-v7-7-58823603bb4e@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 2 +- drivers/net/ethernet/airoha/airoha_npu.h | 103 ------------ drivers/net/ethernet/airoha/airoha_ppe.c | 2 +- include/linux/soc/airoha/airoha_offload.h | 260 ++++++++++++++++++++++++++++++ 4 files changed, 262 insertions(+), 105 deletions(-) delete mode 100644 drivers/net/ethernet/airoha/airoha_npu.h create mode 100644 include/linux/soc/airoha/airoha_offload.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 66a8a992dbf2..1a6b191ae0b0 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -11,9 +11,9 @@ #include #include #include +#include #include "airoha_eth.h" -#include "airoha_npu.h" #define NPU_EN7581_FIRMWARE_DATA "airoha/en7581_npu_data.bin" #define NPU_EN7581_FIRMWARE_RV32 "airoha/en7581_npu_rv32.bin" diff --git a/drivers/net/ethernet/airoha/airoha_npu.h b/drivers/net/ethernet/airoha/airoha_npu.h deleted file mode 100644 index a448c74208a9..000000000000 --- a/drivers/net/ethernet/airoha/airoha_npu.h +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2025 AIROHA Inc - * Author: Lorenzo Bianconi - */ - -#define NPU_NUM_CORES 8 -#define NPU_NUM_IRQ 6 - -enum airoha_npu_wlan_set_cmd { - WLAN_FUNC_SET_WAIT_PCIE_ADDR, - WLAN_FUNC_SET_WAIT_DESC, - WLAN_FUNC_SET_WAIT_NPU_INIT_DONE, - WLAN_FUNC_SET_WAIT_TRAN_TO_CPU, - WLAN_FUNC_SET_WAIT_BA_WIN_SIZE, - WLAN_FUNC_SET_WAIT_DRIVER_MODEL, - WLAN_FUNC_SET_WAIT_DEL_STA, - WLAN_FUNC_SET_WAIT_DRAM_BA_NODE_ADDR, - WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR, - WLAN_FUNC_SET_WAIT_IS_TEST_NOBA, - WLAN_FUNC_SET_WAIT_FLUSHONE_TIMEOUT, - WLAN_FUNC_SET_WAIT_FLUSHALL_TIMEOUT, - WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU, - WLAN_FUNC_SET_WAIT_PCIE_STATE, - WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE, - WLAN_FUNC_SET_WAIT_ERROR_RETRY_TIMES, - WLAN_FUNC_SET_WAIT_BAR_INFO, - WLAN_FUNC_SET_WAIT_FAST_FLAG, - WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU, - WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR, - WLAN_FUNC_SET_WAIT_TX_DESC_HW_BASE, - WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, - WLAN_FUNC_SET_WAIT_RX_RING_FOR_TXDONE_HW_BASE, - WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR, - WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, - WLAN_FUNC_SET_WAIT_INODE_DEBUG_FLAG, - WLAN_FUNC_SET_WAIT_INODE_HW_CFG_INFO, - WLAN_FUNC_SET_WAIT_INODE_STOP_ACTION, - WLAN_FUNC_SET_WAIT_INODE_PCIE_SWAP, - WLAN_FUNC_SET_WAIT_RATELIMIT_CTRL, - WLAN_FUNC_SET_WAIT_HWNAT_INIT, - WLAN_FUNC_SET_WAIT_ARHT_CHIP_INFO, - WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR, - WLAN_FUNC_SET_WAIT_TOKEN_ID_SIZE, -}; - -enum airoha_npu_wlan_get_cmd { - WLAN_FUNC_GET_WAIT_NPU_INFO, - WLAN_FUNC_GET_WAIT_LAST_RATE, - WLAN_FUNC_GET_WAIT_COUNTER, - WLAN_FUNC_GET_WAIT_DBG_COUNTER, - WLAN_FUNC_GET_WAIT_RXDESC_BASE, - WLAN_FUNC_GET_WAIT_WCID_DBG_COUNTER, - WLAN_FUNC_GET_WAIT_DMA_ADDR, - WLAN_FUNC_GET_WAIT_RING_SIZE, - WLAN_FUNC_GET_WAIT_NPU_SUPPORT_MAP, - WLAN_FUNC_GET_WAIT_MDC_LOCK_ADDRESS, - WLAN_FUNC_GET_WAIT_NPU_VERSION, -}; - -struct airoha_npu { - struct device *dev; - struct regmap *regmap; - - struct airoha_npu_core { - struct airoha_npu *npu; - /* protect concurrent npu memory accesses */ - spinlock_t lock; - struct work_struct wdt_work; - } cores[NPU_NUM_CORES]; - - int irqs[NPU_NUM_IRQ]; - - struct airoha_foe_stats __iomem *stats; - - struct { - int (*ppe_init)(struct airoha_npu *npu); - int (*ppe_deinit)(struct airoha_npu *npu); - int (*ppe_flush_sram_entries)(struct airoha_npu *npu, - dma_addr_t foe_addr, - int sram_num_entries); - int (*ppe_foe_commit_entry)(struct airoha_npu *npu, - dma_addr_t foe_addr, - u32 entry_size, u32 hash, - bool ppe2); - int (*wlan_init_reserved_memory)(struct airoha_npu *npu); - int (*wlan_send_msg)(struct airoha_npu *npu, int ifindex, - enum airoha_npu_wlan_set_cmd func_id, - void *data, int data_len, gfp_t gfp); - int (*wlan_get_msg)(struct airoha_npu *npu, int ifindex, - enum airoha_npu_wlan_get_cmd func_id, - void *data, int data_len, gfp_t gfp); - u32 (*wlan_get_queue_addr)(struct airoha_npu *npu, int qid, - bool xmit); - void (*wlan_set_irq_status)(struct airoha_npu *npu, u32 val); - u32 (*wlan_get_irq_status)(struct airoha_npu *npu, int q); - void (*wlan_enable_irq)(struct airoha_npu *npu, int q); - void (*wlan_disable_irq)(struct airoha_npu *npu, int q); - } ops; -}; - -struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr); -void airoha_npu_put(struct airoha_npu *npu); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 47411d2cbd28..82163392332c 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -7,10 +7,10 @@ #include #include #include +#include #include #include -#include "airoha_npu.h" #include "airoha_regs.h" #include "airoha_eth.h" diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h new file mode 100644 index 000000000000..117c63c2448d --- /dev/null +++ b/include/linux/soc/airoha/airoha_offload.h @@ -0,0 +1,260 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 AIROHA Inc + * Author: Lorenzo Bianconi + */ +#ifndef AIROHA_OFFLOAD_H +#define AIROHA_OFFLOAD_H + +#include +#include + +#define NPU_NUM_CORES 8 +#define NPU_NUM_IRQ 6 +#define NPU_RX0_DESC_NUM 512 +#define NPU_RX1_DESC_NUM 512 + +/* CTRL */ +#define NPU_RX_DMA_DESC_LAST_MASK BIT(29) +#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15) +#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1) +#define NPU_RX_DMA_DESC_DONE_MASK BIT(0) +/* INFO */ +#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 28) +#define NPU_RX_DMA_PKT_ID_MASK GENMASK(28, 26) +#define NPU_RX_DMA_SRC_PORT_MASK GENMASK(25, 21) +#define NPU_RX_DMA_CRSN_MASK GENMASK(20, 16) +#define NPU_RX_DMA_FOE_ID_MASK GENMASK(15, 0) +/* DATA */ +#define NPU_RX_DMA_SID_MASK GENMASK(31, 16) +#define NPU_RX_DMA_FRAG_TYPE_MASK GENMASK(15, 14) +#define NPU_RX_DMA_PRIORITY_MASK GENMASK(13, 10) +#define NPU_RX_DMA_RADIO_ID_MASK GENMASK(9, 6) +#define NPU_RX_DMA_VAP_ID_MASK GENMASK(5, 2) +#define NPU_RX_DMA_FRAME_TYPE_MASK GENMASK(1, 0) + +struct airoha_npu_rx_dma_desc { + u32 ctrl; + u32 info; + u32 data; + u32 addr; + u64 rsv; +} __packed; + +/* CTRL */ +#define NPU_TX_DMA_DESC_SCHED_MASK BIT(31) +#define NPU_TX_DMA_DESC_LEN_MASK GENMASK(30, 18) +#define NPU_TX_DMA_DESC_VEND_LEN_MASK GENMASK(17, 1) +#define NPU_TX_DMA_DESC_DONE_MASK BIT(0) + +#define NPU_TXWI_LEN 192 + +struct airoha_npu_tx_dma_desc { + u32 ctrl; + u32 addr; + u64 rsv; + u8 txwi[NPU_TXWI_LEN]; +} __packed; + +enum airoha_npu_wlan_set_cmd { + WLAN_FUNC_SET_WAIT_PCIE_ADDR, + WLAN_FUNC_SET_WAIT_DESC, + WLAN_FUNC_SET_WAIT_NPU_INIT_DONE, + WLAN_FUNC_SET_WAIT_TRAN_TO_CPU, + WLAN_FUNC_SET_WAIT_BA_WIN_SIZE, + WLAN_FUNC_SET_WAIT_DRIVER_MODEL, + WLAN_FUNC_SET_WAIT_DEL_STA, + WLAN_FUNC_SET_WAIT_DRAM_BA_NODE_ADDR, + WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR, + WLAN_FUNC_SET_WAIT_IS_TEST_NOBA, + WLAN_FUNC_SET_WAIT_FLUSHONE_TIMEOUT, + WLAN_FUNC_SET_WAIT_FLUSHALL_TIMEOUT, + WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU, + WLAN_FUNC_SET_WAIT_PCIE_STATE, + WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE, + WLAN_FUNC_SET_WAIT_ERROR_RETRY_TIMES, + WLAN_FUNC_SET_WAIT_BAR_INFO, + WLAN_FUNC_SET_WAIT_FAST_FLAG, + WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU, + WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR, + WLAN_FUNC_SET_WAIT_TX_DESC_HW_BASE, + WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, + WLAN_FUNC_SET_WAIT_RX_RING_FOR_TXDONE_HW_BASE, + WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR, + WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, + WLAN_FUNC_SET_WAIT_INODE_DEBUG_FLAG, + WLAN_FUNC_SET_WAIT_INODE_HW_CFG_INFO, + WLAN_FUNC_SET_WAIT_INODE_STOP_ACTION, + WLAN_FUNC_SET_WAIT_INODE_PCIE_SWAP, + WLAN_FUNC_SET_WAIT_RATELIMIT_CTRL, + WLAN_FUNC_SET_WAIT_HWNAT_INIT, + WLAN_FUNC_SET_WAIT_ARHT_CHIP_INFO, + WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR, + WLAN_FUNC_SET_WAIT_TOKEN_ID_SIZE, +}; + +enum airoha_npu_wlan_get_cmd { + WLAN_FUNC_GET_WAIT_NPU_INFO, + WLAN_FUNC_GET_WAIT_LAST_RATE, + WLAN_FUNC_GET_WAIT_COUNTER, + WLAN_FUNC_GET_WAIT_DBG_COUNTER, + WLAN_FUNC_GET_WAIT_RXDESC_BASE, + WLAN_FUNC_GET_WAIT_WCID_DBG_COUNTER, + WLAN_FUNC_GET_WAIT_DMA_ADDR, + WLAN_FUNC_GET_WAIT_RING_SIZE, + WLAN_FUNC_GET_WAIT_NPU_SUPPORT_MAP, + WLAN_FUNC_GET_WAIT_MDC_LOCK_ADDRESS, + WLAN_FUNC_GET_WAIT_NPU_VERSION, +}; + +struct airoha_npu { +#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU)) + struct device *dev; + struct regmap *regmap; + + struct airoha_npu_core { + struct airoha_npu *npu; + /* protect concurrent npu memory accesses */ + spinlock_t lock; + struct work_struct wdt_work; + } cores[NPU_NUM_CORES]; + + int irqs[NPU_NUM_IRQ]; + + struct airoha_foe_stats __iomem *stats; + + struct { + int (*ppe_init)(struct airoha_npu *npu); + int (*ppe_deinit)(struct airoha_npu *npu); + int (*ppe_flush_sram_entries)(struct airoha_npu *npu, + dma_addr_t foe_addr, + int sram_num_entries); + int (*ppe_foe_commit_entry)(struct airoha_npu *npu, + dma_addr_t foe_addr, + u32 entry_size, u32 hash, + bool ppe2); + int (*wlan_init_reserved_memory)(struct airoha_npu *npu); + int (*wlan_send_msg)(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_set_cmd func_id, + void *data, int data_len, gfp_t gfp); + int (*wlan_get_msg)(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd func_id, + void *data, int data_len, gfp_t gfp); + u32 (*wlan_get_queue_addr)(struct airoha_npu *npu, int qid, + bool xmit); + void (*wlan_set_irq_status)(struct airoha_npu *npu, u32 val); + u32 (*wlan_get_irq_status)(struct airoha_npu *npu, int q); + void (*wlan_enable_irq)(struct airoha_npu *npu, int q); + void (*wlan_disable_irq)(struct airoha_npu *npu, int q); + } ops; +#endif +}; + +#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU)) +struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr); +void airoha_npu_put(struct airoha_npu *npu); + +static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu) +{ + return npu->ops.wlan_init_reserved_memory(npu); +} + +static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu, + int ifindex, + enum airoha_npu_wlan_set_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return npu->ops.wlan_send_msg(npu, ifindex, cmd, data, data_len, gfp); +} + +static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return npu->ops.wlan_get_msg(npu, ifindex, cmd, data, data_len, gfp); +} + +static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu, + int qid, bool xmit) +{ + return npu->ops.wlan_get_queue_addr(npu, qid, xmit); +} + +static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu, + u32 val) +{ + npu->ops.wlan_set_irq_status(npu, val); +} + +static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, int q) +{ + return npu->ops.wlan_get_irq_status(npu, q); +} + +static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q) +{ + npu->ops.wlan_enable_irq(npu, q); +} + +static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q) +{ + npu->ops.wlan_disable_irq(npu, q); +} +#else +static inline struct airoha_npu *airoha_npu_get(struct device *dev, + dma_addr_t *foe_stats_addr) +{ + return NULL; +} + +static inline void airoha_npu_put(struct airoha_npu *npu) +{ +} + +static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu) +{ + return -EOPNOTSUPP; +} + +static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu, + int ifindex, + enum airoha_npu_wlan_set_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return -EOPNOTSUPP; +} + +static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return -EOPNOTSUPP; +} + +static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu, + int qid, bool xmit) +{ + return 0; +} + +static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu, + u32 val) +{ +} + +static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, + int q) +{ + return 0; +} + +static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q) +{ +} + +static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q) +{ +} +#endif + +#endif /* AIROHA_OFFLOAD_H */ -- cgit v1.2.3 From 3eb50369c09efb0f668a7f568a7e6f7cf4194cde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 May 2024 23:22:01 -0400 Subject: scsi: switch ->bios_param() to passing gendisk Instances are passed struct block_device *bdev argument; the only thing it is used for (if it's used in the first place) is bdev->bd_disk. Might as well pass that in the first place... Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Al Viro --- Documentation/scsi/scsi_mid_low_api.rst | 4 ++-- drivers/ata/libata-scsi.c | 4 ++-- drivers/message/fusion/mptscsih.c | 2 +- drivers/message/fusion/mptscsih.h | 2 +- drivers/scsi/3w-9xxx.c | 2 +- drivers/scsi/3w-sas.c | 2 +- drivers/scsi/3w-xxxx.c | 2 +- drivers/scsi/BusLogic.c | 4 ++-- drivers/scsi/BusLogic.h | 2 +- drivers/scsi/aacraid/linit.c | 6 +++--- drivers/scsi/advansys.c | 2 +- drivers/scsi/aha152x.c | 4 ++-- drivers/scsi/aha1542.c | 2 +- drivers/scsi/aha1740.c | 2 +- drivers/scsi/aic7xxx/aic79xx_osm.c | 4 ++-- drivers/scsi/aic7xxx/aic7xxx_osm.c | 4 ++-- drivers/scsi/arcmsr/arcmsr_hba.c | 6 +++--- drivers/scsi/atp870u.c | 2 +- drivers/scsi/fdomain.c | 4 ++-- drivers/scsi/imm.c | 2 +- drivers/scsi/initio.c | 4 ++-- drivers/scsi/ipr.c | 8 ++++---- drivers/scsi/ips.c | 2 +- drivers/scsi/ips.h | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 2 +- drivers/scsi/megaraid.c | 4 ++-- drivers/scsi/megaraid.h | 2 +- drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- drivers/scsi/mpi3mr/mpi3mr_os.c | 4 ++-- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- drivers/scsi/mvumi.c | 2 +- drivers/scsi/myrb.c | 2 +- drivers/scsi/pcmcia/sym53c500_cs.c | 2 +- drivers/scsi/ppa.c | 2 +- drivers/scsi/qla1280.c | 2 +- drivers/scsi/qlogicfas408.c | 2 +- drivers/scsi/qlogicfas408.h | 2 +- drivers/scsi/scsicam.c | 6 +++--- drivers/scsi/sd.c | 4 ++-- drivers/scsi/stex.c | 2 +- drivers/scsi/storvsc_drv.c | 2 +- drivers/scsi/wd719x.c | 2 +- include/linux/libata.h | 2 +- include/scsi/libsas.h | 2 +- include/scsi/scsi_host.h | 2 +- include/scsi/scsicam.h | 2 +- 46 files changed, 68 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/Documentation/scsi/scsi_mid_low_api.rst b/Documentation/scsi/scsi_mid_low_api.rst index 96c8230d638e..634f5c28a849 100644 --- a/Documentation/scsi/scsi_mid_low_api.rst +++ b/Documentation/scsi/scsi_mid_low_api.rst @@ -623,7 +623,7 @@ Details:: * bios_param - fetch head, sector, cylinder info for a disk * @sdev: pointer to scsi device context (defined in * include/scsi/scsi_device.h) - * @bdev: pointer to block device context (defined in fs.h) + * @disk: pointer to gendisk (defined in blkdev.h) * @capacity: device size (in 512 byte sectors) * @params: three element array to place output: * params[0] number of heads (max 255) @@ -643,7 +643,7 @@ Details:: * * Optionally defined in: LLD **/ - int bios_param(struct scsi_device * sdev, struct block_device *bdev, + int bios_param(struct scsi_device * sdev, struct gendisk *disk, sector_t capacity, int params[3]) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 57f674f51b0c..3603b430724b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -351,7 +351,7 @@ EXPORT_SYMBOL_GPL(ata_common_sdev_groups); /** * ata_std_bios_param - generic bios head/sector/cylinder calculator used by sd. * @sdev: SCSI device for which BIOS geometry is to be determined - * @bdev: block device associated with @sdev + * @unused: gendisk associated with @sdev * @capacity: capacity of SCSI device * @geom: location to which geometry will be output * @@ -366,7 +366,7 @@ EXPORT_SYMBOL_GPL(ata_common_sdev_groups); * RETURNS: * Zero. */ -int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, +int ata_std_bios_param(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { geom[0] = 255; diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 3a64dc7a7e27..3304f8824cf7 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -2074,7 +2074,7 @@ mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, * This is anyones guess quite frankly. */ int -mptscsih_bios_param(struct scsi_device * sdev, struct block_device *bdev, +mptscsih_bios_param(struct scsi_device * sdev, struct gendisk *unused, sector_t capacity, int geom[]) { int heads; diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h index 8c2bb2331fc1..f9678d48100c 100644 --- a/drivers/message/fusion/mptscsih.h +++ b/drivers/message/fusion/mptscsih.h @@ -123,7 +123,7 @@ extern int mptscsih_abort(struct scsi_cmnd * SCpnt); extern int mptscsih_dev_reset(struct scsi_cmnd * SCpnt); extern int mptscsih_bus_reset(struct scsi_cmnd * SCpnt); extern int mptscsih_host_reset(struct scsi_cmnd *SCpnt); -extern int mptscsih_bios_param(struct scsi_device * sdev, struct block_device *bdev, sector_t capacity, int geom[]); +extern int mptscsih_bios_param(struct scsi_device * sdev, struct gendisk *unused, sector_t capacity, int geom[]); extern int mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r); extern int mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r); extern int mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 883d4a12a172..a377a6f6900a 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1695,7 +1695,7 @@ out: } /* End twa_reset_sequence() */ /* This funciton returns unit geometry in cylinders/heads/sectors */ -static int twa_scsi_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]) +static int twa_scsi_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { int heads, sectors, cylinders; diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 8d4174c7107e..e319be7d369c 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1404,7 +1404,7 @@ out: } /* End twl_reset_device_extension() */ /* This funciton returns unit geometry in cylinders/heads/sectors */ -static int twl_scsi_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]) +static int twl_scsi_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { int heads, sectors; diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index 89bd56f78ef9..0306a228c702 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -1340,7 +1340,7 @@ static int tw_reset_device_extension(TW_Device_Extension *tw_dev) } /* End tw_reset_device_extension() */ /* This funciton returns unit geometry in cylinders/heads/sectors */ -static int tw_scsi_biosparam(struct scsi_device *sdev, struct block_device *bdev, +static int tw_scsi_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { int heads, sectors, cylinders; diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 743be2ef6d1a..8b17d8103e90 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -3240,7 +3240,7 @@ static int blogic_resetadapter(struct blogic_adapter *adapter, bool hard_reset) the BIOS, and a warning may be displayed. */ -static int blogic_diskparam(struct scsi_device *sdev, struct block_device *dev, +static int blogic_diskparam(struct scsi_device *sdev, struct gendisk *disk, sector_t capacity, int *params) { struct blogic_adapter *adapter = @@ -3261,7 +3261,7 @@ static int blogic_diskparam(struct scsi_device *sdev, struct block_device *dev, diskparam->sectors = 32; } diskparam->cylinders = (unsigned long) capacity / (diskparam->heads * diskparam->sectors); - buf = scsi_bios_ptable(dev->bd_disk); + buf = scsi_bios_ptable(disk); if (buf == NULL) return 0; /* diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h index 61bf26d4fc10..79de815e33b0 100644 --- a/drivers/scsi/BusLogic.h +++ b/drivers/scsi/BusLogic.h @@ -1273,7 +1273,7 @@ static inline void blogic_incszbucket(unsigned int *cmdsz_buckets, static const char *blogic_drvr_info(struct Scsi_Host *); static int blogic_qcmd(struct Scsi_Host *h, struct scsi_cmnd *); -static int blogic_diskparam(struct scsi_device *, struct block_device *, sector_t, int *); +static int blogic_diskparam(struct scsi_device *, struct gendisk *, sector_t, int *); static int blogic_sdev_configure(struct scsi_device *, struct queue_limits *lim); static void blogic_qcompleted_ccb(struct blogic_ccb *); diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 2264a97d91a0..ea66196ef7c7 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -273,7 +273,7 @@ struct aac_driver_ident* aac_get_driver_ident(int devtype) /** * aac_biosparm - return BIOS parameters for disk * @sdev: The scsi device corresponding to the disk - * @bdev: the block device corresponding to the disk + * @disk: the gendisk corresponding to the disk * @capacity: the sector capacity of the disk * @geom: geometry block to fill in * @@ -292,7 +292,7 @@ struct aac_driver_ident* aac_get_driver_ident(int devtype) * be displayed. */ -static int aac_biosparm(struct scsi_device *sdev, struct block_device *bdev, +static int aac_biosparm(struct scsi_device *sdev, struct gendisk *disk, sector_t capacity, int *geom) { struct diskparm *param = (struct diskparm *)geom; @@ -324,7 +324,7 @@ static int aac_biosparm(struct scsi_device *sdev, struct block_device *bdev, * entry whose end_head matches one of the standard geometry * translations ( 64/32, 128/32, 255/63 ). */ - buf = scsi_bios_ptable(bdev->bd_disk); + buf = scsi_bios_ptable(disk); if (!buf) return 0; if (*(__le16 *)(buf + 0x40) == cpu_to_le16(MSDOS_LABEL_MAGIC)) { diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index 3a2c336307c0..063e1b5818d3 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -7096,7 +7096,7 @@ static int advansys_reset(struct scsi_cmnd *scp) * ip[2]: cylinders */ static int -advansys_biosparam(struct scsi_device *sdev, struct block_device *bdev, +advansys_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int ip[]) { struct asc_board *boardp = shost_priv(sdev->host); diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index e94c0a19c435..182aa80ec4c6 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -1246,7 +1246,7 @@ int aha152x_host_reset_host(struct Scsi_Host *shpnt) * Return the "logical geometry" * */ -static int aha152x_biosparam(struct scsi_device *sdev, struct block_device *bdev, +static int aha152x_biosparam(struct scsi_device *sdev, struct gendisk *disk, sector_t capacity, int *info_array) { struct Scsi_Host *shpnt = sdev->host; @@ -1261,7 +1261,7 @@ static int aha152x_biosparam(struct scsi_device *sdev, struct block_device *bdev int info[3]; /* try to figure out the geometry from the partition table */ - if (scsicam_bios_param(bdev, capacity, info) < 0 || + if (scsicam_bios_param(disk, capacity, info) < 0 || !((info[0] == 64 && info[1] == 32) || (info[0] == 255 && info[1] == 63))) { if (EXT_TRANS) { printk(KERN_NOTICE diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c index 389499d3e00a..371e8300f029 100644 --- a/drivers/scsi/aha1542.c +++ b/drivers/scsi/aha1542.c @@ -992,7 +992,7 @@ static int aha1542_host_reset(struct scsi_cmnd *cmd) } static int aha1542_biosparam(struct scsi_device *sdev, - struct block_device *bdev, sector_t capacity, int geom[]) + struct gendisk *unused, sector_t capacity, int geom[]) { struct aha1542_hostdata *aha1542 = shost_priv(sdev->host); diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c index be7ebbbb9ba8..b234621f6b37 100644 --- a/drivers/scsi/aha1740.c +++ b/drivers/scsi/aha1740.c @@ -510,7 +510,7 @@ static void aha1740_getconfig(unsigned int base, unsigned int *irq_level, } static int aha1740_biosparam(struct scsi_device *sdev, - struct block_device *dev, + struct gendisk *unused, sector_t capacity, int* ip) { int size = capacity; diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index 2cff19e95fec..c3d1b9dd24ae 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -720,7 +720,7 @@ ahd_linux_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) * Return the disk geometry for the given SCSI device. */ static int -ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, +ahd_linux_biosparam(struct scsi_device *sdev, struct gendisk *disk, sector_t capacity, int geom[]) { int heads; @@ -731,7 +731,7 @@ ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, ahd = *((struct ahd_softc **)sdev->host->hostdata); - if (scsi_partsize(bdev->bd_disk, capacity, geom)) + if (scsi_partsize(disk, capacity, geom)) return 0; heads = 64; diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index 05bdb73d1157..8b2b98666d61 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -683,7 +683,7 @@ ahc_linux_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) * Return the disk geometry for the given SCSI device. */ static int -ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, +ahc_linux_biosparam(struct scsi_device *sdev, struct gendisk *disk, sector_t capacity, int geom[]) { int heads; @@ -696,7 +696,7 @@ ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, ahc = *((struct ahc_softc **)sdev->host->hostdata); channel = sdev_channel(sdev); - if (scsi_partsize(bdev->bd_disk, capacity, geom)) + if (scsi_partsize(disk, capacity, geom)) return 0; heads = 64; diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 6968da9fb67c..f0c5a30ce51b 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -112,7 +112,7 @@ static int arcmsr_iop_confirm(struct AdapterControlBlock *acb); static int arcmsr_abort(struct scsi_cmnd *); static int arcmsr_bus_reset(struct scsi_cmnd *); static int arcmsr_bios_param(struct scsi_device *sdev, - struct block_device *bdev, sector_t capacity, int *info); + struct gendisk *disk, sector_t capacity, int *info); static int arcmsr_queue_command(struct Scsi_Host *h, struct scsi_cmnd *cmd); static int arcmsr_probe(struct pci_dev *pdev, const struct pci_device_id *id); @@ -377,11 +377,11 @@ static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id) } static int arcmsr_bios_param(struct scsi_device *sdev, - struct block_device *bdev, sector_t capacity, int *geom) + struct gendisk *disk, sector_t capacity, int *geom) { int heads, sectors, cylinders, total_capacity; - if (scsi_partsize(bdev->bd_disk, capacity, geom)) + if (scsi_partsize(disk, capacity, geom)) return 0; total_capacity = capacity; diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c index 401242912855..df6f40b51deb 100644 --- a/drivers/scsi/atp870u.c +++ b/drivers/scsi/atp870u.c @@ -1692,7 +1692,7 @@ static int atp870u_show_info(struct seq_file *m, struct Scsi_Host *HBAptr) } -static int atp870u_biosparam(struct scsi_device *disk, struct block_device *dev, +static int atp870u_biosparam(struct scsi_device *disk, struct gendisk *unused, sector_t capacity, int *ip) { int heads, sectors, cylinders; diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c index 4a3716dc644c..c0b2a980db34 100644 --- a/drivers/scsi/fdomain.c +++ b/drivers/scsi/fdomain.c @@ -469,10 +469,10 @@ static int fdomain_host_reset(struct scsi_cmnd *cmd) } static int fdomain_biosparam(struct scsi_device *sdev, - struct block_device *bdev, sector_t capacity, + struct gendisk *disk, sector_t capacity, int geom[]) { - unsigned char *p = scsi_bios_ptable(bdev->bd_disk); + unsigned char *p = scsi_bios_ptable(disk); if (p && p[65] == 0xaa && p[64] == 0x55 /* Partition table valid */ && p[4]) { /* Partition type */ diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c index 0821cf994b98..5c602c057798 100644 --- a/drivers/scsi/imm.c +++ b/drivers/scsi/imm.c @@ -954,7 +954,7 @@ static DEF_SCSI_QCMD(imm_queuecommand) * be done in sd.c. Even if it gets fixed there, this will still * work. */ -static int imm_biosparam(struct scsi_device *sdev, struct block_device *dev, +static int imm_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int ip[]) { ip[0] = 0x40; diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c index 8648bd965287..ed34ad92c807 100644 --- a/drivers/scsi/initio.c +++ b/drivers/scsi/initio.c @@ -2645,7 +2645,7 @@ static int i91u_bus_reset(struct scsi_cmnd * cmnd) /** * i91u_biosparam - return the "logical geometry * @sdev: SCSI device - * @dev: Matching block device + * @unused: Matching gendisk * @capacity: Sector size of drive * @info_array: Return space for BIOS geometry * @@ -2655,7 +2655,7 @@ static int i91u_bus_reset(struct scsi_cmnd * cmnd) * FIXME: limited to 2^32 sector devices. */ -static int i91u_biosparam(struct scsi_device *sdev, struct block_device *dev, +static int i91u_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int *info_array) { struct initio_host *host; /* Point to Host adapter control block */ diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index d06b79f03538..dd6754db7e4c 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4644,10 +4644,10 @@ ATTRIBUTE_GROUPS(ipr_dev); /** * ipr_biosparam - Return the HSC mapping - * @sdev: scsi device struct - * @block_device: block device pointer + * @sdev: scsi device struct + * @unused: gendisk pointer * @capacity: capacity of the device - * @parm: Array containing returned HSC values. + * @parm: Array containing returned HSC values. * * This function generates the HSC parms that fdisk uses. * We want to make sure we return something that places partitions @@ -4657,7 +4657,7 @@ ATTRIBUTE_GROUPS(ipr_dev); * 0 on success **/ static int ipr_biosparam(struct scsi_device *sdev, - struct block_device *block_device, + struct gendisk *unused, sector_t capacity, int *parm) { int heads, sectors; diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 94adb6ac02a4..3393a288fd23 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -1123,7 +1123,7 @@ static DEF_SCSI_QCMD(ips_queue) /* Set bios geometry for the controller */ /* */ /****************************************************************************/ -static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, +static int ips_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { ips_ha_t *ha = (ips_ha_t *) sdev->host->hostdata; diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h index 8ac932ec4444..30a4d4a580e9 100644 --- a/drivers/scsi/ips.h +++ b/drivers/scsi/ips.h @@ -398,7 +398,7 @@ /* * Scsi_Host Template */ - static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, + static int ips_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]); static int ips_sdev_configure(struct scsi_device *SDptr, struct queue_limits *lim); diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 928723c90b75..ffa5b49aaf08 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -845,7 +845,7 @@ int sas_change_queue_depth(struct scsi_device *sdev, int depth) EXPORT_SYMBOL_GPL(sas_change_queue_depth); int sas_bios_param(struct scsi_device *scsi_dev, - struct block_device *bdev, + struct gendisk *unused, sector_t capacity, int *hsc) { hsc[0] = 255; diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index c7581c7829af..a00622c0c526 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -2780,7 +2780,7 @@ static inline void mega_create_proc_entry(int index, struct proc_dir_entry *pare * Return the disk geometry for a particular disk */ static int -megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev, +megaraid_biosparam(struct scsi_device *sdev, struct gendisk *disk, sector_t capacity, int geom[]) { adapter_t *adapter; @@ -2813,7 +2813,7 @@ megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev, geom[2] = cylinders; } else { - if (scsi_partsize(bdev->bd_disk, capacity, geom)) + if (scsi_partsize(disk, capacity, geom)) return 0; dev_info(&adapter->dev->dev, diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h index 013fbfb911b9..d6bfd26a8843 100644 --- a/drivers/scsi/megaraid.h +++ b/drivers/scsi/megaraid.h @@ -975,7 +975,7 @@ static void mega_free_scb(adapter_t *, scb_t *); static int megaraid_abort(struct scsi_cmnd *); static int megaraid_reset(struct scsi_cmnd *); static int megaraid_abort_and_reset(adapter_t *, struct scsi_cmnd *, int); -static int megaraid_biosparam(struct scsi_device *, struct block_device *, +static int megaraid_biosparam(struct scsi_device *, struct gendisk *, sector_t, int []); static int mega_build_sglist (adapter_t *adapter, scb_t *scb, diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 615e06fd4ee8..abbbc4b36cd1 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3137,12 +3137,12 @@ static int megasas_reset_target(struct scsi_cmnd *scmd) /** * megasas_bios_param - Returns disk geometry for a disk * @sdev: device handle - * @bdev: block device + * @unused: gendisk * @capacity: drive capacity * @geom: geometry parameters */ static int -megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, +megasas_bios_param(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { int heads; diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index e467b56949e9..3df52a3b435b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -4031,7 +4031,7 @@ out: /** * mpi3mr_bios_param - BIOS param callback * @sdev: SCSI device reference - * @bdev: Block device reference + * @unused: gendisk reference * @capacity: Capacity in logical sectors * @params: Parameter array * @@ -4040,7 +4040,7 @@ out: * Return: 0 always */ static int mpi3mr_bios_param(struct scsi_device *sdev, - struct block_device *bdev, sector_t capacity, int params[]) + struct gendisk *unused, sector_t capacity, int params[]) { int heads; int sectors; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 967af259118e..7092d0debef3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2754,7 +2754,7 @@ scsih_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) /** * scsih_bios_param - fetch head, sector, cylinder info for a disk * @sdev: scsi device struct - * @bdev: pointer to block device context + * @unused: pointer to gendisk * @capacity: device size (in 512 byte sectors) * @params: three element array to place output: * params[0] number of heads (max 255) @@ -2762,7 +2762,7 @@ scsih_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) * params[2] number of cylinders */ static int -scsih_bios_param(struct scsi_device *sdev, struct block_device *bdev, +scsih_bios_param(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int params[]) { int heads; diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c index 96549e7f5705..bdc2f2f17753 100644 --- a/drivers/scsi/mvumi.c +++ b/drivers/scsi/mvumi.c @@ -2142,7 +2142,7 @@ static enum scsi_timeout_action mvumi_timed_out(struct scsi_cmnd *scmd) } static int -mvumi_bios_param(struct scsi_device *sdev, struct block_device *bdev, +mvumi_bios_param(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { int heads, sectors; diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index 486db5b2f05d..b8453c0333dc 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1745,7 +1745,7 @@ static void myrb_sdev_destroy(struct scsi_device *sdev) kfree(sdev->hostdata); } -static int myrb_biosparam(struct scsi_device *sdev, struct block_device *bdev, +static int myrb_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { struct myrb_hba *cb = shost_priv(sdev->host); diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index 278c78d066c4..a3b505240351 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -597,7 +597,7 @@ SYM53C500_host_reset(struct scsi_cmnd *SCpnt) static int SYM53C500_biosparm(struct scsi_device *disk, - struct block_device *dev, + struct gendisk *unused, sector_t capacity, int *info_array) { int size; diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index 1ed3171f1797..ea682f3044b6 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -845,7 +845,7 @@ static DEF_SCSI_QCMD(ppa_queuecommand) * be done in sd.c. Even if it gets fixed there, this will still * work. */ -static int ppa_biosparam(struct scsi_device *sdev, struct block_device *dev, +static int ppa_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int ip[]) { ip[0] = 0x40; diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 6af018f1ca22..ef841f643171 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -1023,7 +1023,7 @@ qla1280_eh_adapter_reset(struct scsi_cmnd *cmd) } static int -qla1280_biosparam(struct scsi_device *sdev, struct block_device *bdev, +qla1280_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { int heads, sectors, cylinders; diff --git a/drivers/scsi/qlogicfas408.c b/drivers/scsi/qlogicfas408.c index 3e065d5fc80c..1ce469b7db99 100644 --- a/drivers/scsi/qlogicfas408.c +++ b/drivers/scsi/qlogicfas408.c @@ -492,7 +492,7 @@ DEF_SCSI_QCMD(qlogicfas408_queuecommand) * Return bios parameters */ -int qlogicfas408_biosparam(struct scsi_device *disk, struct block_device *dev, +int qlogicfas408_biosparam(struct scsi_device *disk, struct gendisk *unused, sector_t capacity, int ip[]) { /* This should mimic the DOS Qlogic driver's behavior exactly */ diff --git a/drivers/scsi/qlogicfas408.h b/drivers/scsi/qlogicfas408.h index a971db11d293..83ef86c71f2f 100644 --- a/drivers/scsi/qlogicfas408.h +++ b/drivers/scsi/qlogicfas408.h @@ -106,7 +106,7 @@ struct qlogicfas408_priv { irqreturn_t qlogicfas408_ihandl(int irq, void *dev_id); int qlogicfas408_queuecommand(struct Scsi_Host *h, struct scsi_cmnd * cmd); int qlogicfas408_biosparam(struct scsi_device * disk, - struct block_device *dev, + struct gendisk *unused, sector_t capacity, int ip[]); int qlogicfas408_abort(struct scsi_cmnd * cmd); extern int qlogicfas408_host_reset(struct scsi_cmnd *cmd); diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index 3ff2cf51d5b0..887de505bcf9 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -205,7 +205,7 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds /** * scsicam_bios_param - Determine geometry of a disk in cylinders/heads/sectors. - * @bdev: which device + * @disk: which device * @capacity: size of the disk in sectors * @ip: return value: ip[0]=heads, ip[1]=sectors, ip[2]=cylinders * @@ -215,13 +215,13 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds * * Returns : -1 on failure, 0 on success. */ -int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip) +int scsicam_bios_param(struct gendisk *disk, sector_t capacity, int *ip) { u64 capacity64 = capacity; /* Suppress gcc warning */ int ret = 0; /* try to infer mapping from partition table */ - if (scsi_partsize(bdev->bd_disk, capacity, ip)) + if (scsi_partsize(disk, capacity, ip)) return 0; if (capacity64 < (1ULL << 32)) { diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5b8668accf8e..3edcc43f180b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1614,9 +1614,9 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) /* override with calculated, extended default, or driver values */ if (host->hostt->bios_param) - host->hostt->bios_param(sdp, bdev, capacity, diskinfo); + host->hostt->bios_param(sdp, bdev->bd_disk, capacity, diskinfo); else - scsicam_bios_param(bdev, capacity, diskinfo); + scsicam_bios_param(bdev->bd_disk, capacity, diskinfo); geo->heads = diskinfo[0]; geo->sectors = diskinfo[1]; diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 63ed7f9aaa93..d8ad02c29320 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -1457,7 +1457,7 @@ static void stex_reset_work(struct work_struct *work) } static int stex_biosparam(struct scsi_device *sdev, - struct block_device *bdev, sector_t capacity, int geom[]) + struct gendisk *unused, sector_t capacity, int geom[]) { int heads = 255, sectors = 63; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index d9e59204a9c3..dc51ea352198 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1615,7 +1615,7 @@ static int storvsc_sdev_configure(struct scsi_device *sdevice, return 0; } -static int storvsc_get_chs(struct scsi_device *sdev, struct block_device * bdev, +static int storvsc_get_chs(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int *info) { sector_t nsect = capacity; diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c index 5a380eecfc75..0c9987828774 100644 --- a/drivers/scsi/wd719x.c +++ b/drivers/scsi/wd719x.c @@ -544,7 +544,7 @@ static int wd719x_host_reset(struct scsi_cmnd *cmd) return wd719x_chip_init(wd) == 0 ? SUCCESS : FAILED; } -static int wd719x_biosparam(struct scsi_device *sdev, struct block_device *bdev, +static int wd719x_biosparam(struct scsi_device *sdev, struct gendisk *unused, sector_t capacity, int geom[]) { if (capacity >= 0x200000) { diff --git a/include/linux/libata.h b/include/linux/libata.h index 0620dd67369f..21de0935775d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1203,7 +1203,7 @@ extern void ata_qc_complete(struct ata_queued_cmd *qc); extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); extern int ata_std_bios_param(struct scsi_device *sdev, - struct block_device *bdev, + struct gendisk *unused, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); extern int ata_scsi_sdev_init(struct scsi_device *sdev); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index ba460b6c0374..9c6e90829dbd 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -685,7 +685,7 @@ extern int sas_queuecommand(struct Scsi_Host *, struct scsi_cmnd *); extern int sas_target_alloc(struct scsi_target *); int sas_sdev_configure(struct scsi_device *dev, struct queue_limits *lim); extern int sas_change_queue_depth(struct scsi_device *, int new_depth); -extern int sas_bios_param(struct scsi_device *, struct block_device *, +extern int sas_bios_param(struct scsi_device *, struct gendisk *, sector_t capacity, int *hsc); int sas_execute_internal_abort_single(struct domain_device *device, u16 tag, unsigned int qid, diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index c53812b9026f..f5a243261236 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -318,7 +318,7 @@ struct scsi_host_template { * * Status: OPTIONAL */ - int (* bios_param)(struct scsi_device *, struct block_device *, + int (* bios_param)(struct scsi_device *, struct gendisk *, sector_t, int []); /* diff --git a/include/scsi/scsicam.h b/include/scsi/scsicam.h index 67f4e8835bc8..1131f51ed2c8 100644 --- a/include/scsi/scsicam.h +++ b/include/scsi/scsicam.h @@ -14,7 +14,7 @@ #ifndef SCSICAM_H #define SCSICAM_H struct gendisk; -int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip); +int scsicam_bios_param(struct gendisk *disk, sector_t capacity, int *ip); bool scsi_partsize(struct gendisk *disk, sector_t capacity, int geom[3]); unsigned char *scsi_bios_ptable(struct gendisk *disk); #endif /* def SCSICAM_H */ -- cgit v1.2.3 From 4fc8728aa34f54835b72e4db0f3db76a72948b65 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 May 2024 22:19:55 -0400 Subject: block: switch ->getgeo() to struct gendisk Instances are happier that way and it makes more sense anyway - the only part of the result that is related to partition we are given is the start sector, and that has been filled in by the caller. Everything else is a function of the disk. Only one instance (DASD) is ever looking at anything other than bdev->bd_disk and that one is trivial to adjust. Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Al Viro --- Documentation/filesystems/locking.rst | 2 +- arch/m68k/emu/nfblock.c | 4 ++-- arch/um/drivers/ubd_kern.c | 6 +++--- block/ioctl.c | 4 ++-- block/partitions/ibm.c | 2 +- drivers/block/amiflop.c | 10 +++++----- drivers/block/aoe/aoeblk.c | 4 ++-- drivers/block/floppy.c | 4 ++-- drivers/block/mtip32xx/mtip32xx.c | 6 +++--- drivers/block/rnbd/rnbd-clt.c | 4 ++-- drivers/block/sunvdc.c | 3 +-- drivers/block/swim.c | 4 ++-- drivers/block/virtio_blk.c | 6 +++--- drivers/block/xen-blkfront.c | 4 ++-- drivers/md/dm.c | 4 ++-- drivers/md/md.c | 4 ++-- drivers/memstick/core/ms_block.c | 4 ++-- drivers/memstick/core/mspro_block.c | 4 ++-- drivers/mmc/core/block.c | 4 ++-- drivers/mtd/mtd_blkdevs.c | 4 ++-- drivers/mtd/ubi/block.c | 4 ++-- drivers/nvdimm/btt.c | 4 ++-- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/nvme.h | 2 +- drivers/s390/block/dasd.c | 7 ++++--- drivers/scsi/sd.c | 8 ++++---- include/linux/blkdev.h | 2 +- 27 files changed, 59 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index aa287ccdac2f..77704fde9845 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -443,7 +443,7 @@ prototypes:: int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); void (*unlock_native_capacity) (struct gendisk *); - int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*getgeo)(struct gendisk *, struct hd_geometry *); void (*swap_slot_free_notify) (struct block_device *, unsigned long); locking rules: diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 83410f8184ec..94a4fadc651a 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -77,9 +77,9 @@ static void nfhd_submit_bio(struct bio *bio) bio_endio(bio); } -static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int nfhd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct nfhd_device *dev = bdev->bd_disk->private_data; + struct nfhd_device *dev = disk->private_data; geo->cylinders = dev->blocks >> (6 - dev->bshift); geo->heads = 4; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 4de6613e7468..f2b2feeeb455 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -108,7 +108,7 @@ static DEFINE_MUTEX(ubd_lock); static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); -static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); +static int ubd_getgeo(struct gendisk *disk, struct hd_geometry *geo); #define MAX_DEV (16) @@ -1324,9 +1324,9 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, return res; } -static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int ubd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct ubd *ubd_dev = bdev->bd_disk->private_data; + struct ubd *ubd_dev = disk->private_data; geo->heads = 128; geo->sectors = 32; diff --git a/block/ioctl.c b/block/ioctl.c index f7b0006ca45d..a14304f737c5 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -481,7 +481,7 @@ static int blkdev_getgeo(struct block_device *bdev, */ memset(&geo, 0, sizeof(geo)); geo.start = get_start_sect(bdev); - ret = disk->fops->getgeo(bdev, &geo); + ret = disk->fops->getgeo(disk, &geo); if (ret) return ret; if (copy_to_user(argp, &geo, sizeof(geo))) @@ -515,7 +515,7 @@ static int compat_hdio_getgeo(struct block_device *bdev, * want to override it. */ geo.start = get_start_sect(bdev); - ret = disk->fops->getgeo(bdev, &geo); + ret = disk->fops->getgeo(disk, &geo); if (ret) return ret; diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c index 82d9c4c3fb41..631291fbb356 100644 --- a/block/partitions/ibm.c +++ b/block/partitions/ibm.c @@ -358,7 +358,7 @@ int ibm_partition(struct parsed_partitions *state) goto out_nolab; /* set start if not filled by getgeo function e.g. virtblk */ geo->start = get_start_sect(bdev); - if (disk->fops->getgeo(bdev, geo)) + if (disk->fops->getgeo(disk, geo)) goto out_freeall; if (!fn || fn(disk, info)) { kfree(info); diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 6357d86eafdc..2932b6653b6f 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1523,13 +1523,13 @@ static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - int drive = MINOR(bdev->bd_dev) & 3; + struct amiga_floppy_struct *p = disk->private_data; - geo->heads = unit[drive].type->heads; - geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult; - geo->cylinders = unit[drive].type->tracks; + geo->heads = p->type->heads; + geo->sectors = p->dtype->sects * p->type->sect_mult; + geo->cylinders = p->type->tracks; return 0; } diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 00b74a845328..34ead75e7e02 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -269,9 +269,9 @@ static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx, } static int -aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +aoeblk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct aoedev *d = bdev->bd_disk->private_data; + struct aoedev *d = disk->private_data; if ((d->flags & DEVFL_UP) == 0) { printk(KERN_ERR "aoe: disk not up\n"); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 24be0c2c4075..eb125a36ec24 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3363,9 +3363,9 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g) return 0; } -static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - int drive = (long)bdev->bd_disk->private_data; + int drive = (long)disk->private_data; int type = ITYPE(drive_state[drive].fd_device); struct floppy_struct *g; int ret; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 8fc7761397bd..567192e371a8 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3148,17 +3148,17 @@ static int mtip_block_compat_ioctl(struct block_device *dev, * that each partition is also 4KB aligned. Non-aligned partitions adversely * affects performance. * - * @dev Pointer to the block_device strucutre. + * @disk Pointer to the gendisk strucutre. * @geo Pointer to a hd_geometry structure. * * return value * 0 Operation completed successfully. * -ENOTTY An error occurred while reading the drive capacity. */ -static int mtip_block_getgeo(struct block_device *dev, +static int mtip_block_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct driver_data *dd = dev->bd_disk->private_data; + struct driver_data *dd = disk->private_data; sector_t capacity; if (!dd) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 15627417f12e..119b7e27023f 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -942,11 +942,11 @@ static void rnbd_client_release(struct gendisk *gen) rnbd_clt_put_dev(dev); } -static int rnbd_client_getgeo(struct block_device *block_device, +static int rnbd_client_getgeo(struct gendisk *disk, struct hd_geometry *geo) { u64 size; - struct rnbd_clt_dev *dev = block_device->bd_disk->private_data; + struct rnbd_clt_dev *dev = disk->private_data; struct queue_limits *limit = &dev->queue->limits; size = dev->size * (limit->logical_block_size / SECTOR_SIZE); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 7af21fe67671..107751721178 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -119,9 +119,8 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr) return vio_dring_avail(dr, VDC_TX_RING_SIZE); } -static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int vdc_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct gendisk *disk = bdev->bd_disk; sector_t nsect = get_capacity(disk); sector_t cylinders = nsect; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index eda33c5eb5e2..416015947ae6 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -711,9 +711,9 @@ static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode, return -ENOTTY; } -static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int floppy_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct floppy_state *fs = bdev->bd_disk->private_data; + struct floppy_state *fs = disk->private_data; struct floppy_struct *g; int ret; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index e649fa67bac1..85efa7e535f8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -829,9 +829,9 @@ out: } /* We provide getgeo only to please some old bootloader/partitioning tools */ -static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) +static int virtblk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct virtio_blk *vblk = bd->bd_disk->private_data; + struct virtio_blk *vblk = disk->private_data; int ret = 0; mutex_lock(&vblk->vdev_mutex); @@ -853,7 +853,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) /* some standard values, similar to sd */ geo->heads = 1 << 6; geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bd->bd_disk) >> 11; + geo->cylinders = get_capacity(disk) >> 11; } out: mutex_unlock(&vblk->vdev_mutex); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 5babe575c288..04fc6b552c04 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -493,11 +493,11 @@ static void blkif_restart_queue_callback(void *arg) schedule_work(&rinfo->work); } -static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) +static int blkif_getgeo(struct gendisk *disk, struct hd_geometry *hg) { /* We don't have real geometry info, but let's at least return values consistent with the size of the device */ - sector_t nsect = get_capacity(bd->bd_disk); + sector_t nsect = get_capacity(disk); sector_t cylinders = nsect; hg->heads = 0xff; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a44e8c2dccee..7bd6fa05b00a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -403,9 +403,9 @@ static void do_deferred_remove(struct work_struct *w) dm_deferred_remove(); } -static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int dm_blk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct mapped_device *md = bdev->bd_disk->private_data; + struct mapped_device *md = disk->private_data; return dm_get_geometry(md, geo); } diff --git a/drivers/md/md.c b/drivers/md/md.c index ac85ec73a409..236bb04f3e54 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7678,9 +7678,9 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev) * 4 sectors (with a BIG number of cylinders...). This drives * dosfs just mad... ;-) */ -static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int md_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct mddev *mddev = bdev->bd_disk->private_data; + struct mddev *mddev = disk->private_data; geo->heads = 2; geo->sectors = 4; diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index d34892782f6e..1af157ce0a63 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -1953,10 +1953,10 @@ static void msb_data_clear(struct msb_data *msb) msb->card = NULL; } -static int msb_bd_getgeo(struct block_device *bdev, +static int msb_bd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct msb_data *msb = bdev->bd_disk->private_data; + struct msb_data *msb = disk->private_data; *geo = msb->geometry; return 0; } diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index c9853d887d28..075519caa547 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -189,10 +189,10 @@ static void mspro_block_bd_free_disk(struct gendisk *disk) kfree(msb); } -static int mspro_block_bd_getgeo(struct block_device *bdev, +static int mspro_block_bd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct mspro_block_data *msb = bdev->bd_disk->private_data; + struct mspro_block_data *msb = disk->private_data; geo->heads = msb->heads; geo->sectors = msb->sectors_per_track; diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9cc47bf94804..d1f295af9713 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -435,9 +435,9 @@ static void mmc_blk_release(struct gendisk *disk) } static int -mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +mmc_blk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16); + geo->cylinders = get_capacity(disk) / (4 * 16); geo->heads = 4; geo->sectors = 16; return 0; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 847c11542f02..28e09d080440 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -246,9 +246,9 @@ unlock: blktrans_dev_put(dev); } -static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int blktrans_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data; + struct mtd_blktrans_dev *dev = disk->private_data; int ret = -ENXIO; mutex_lock(&dev->lock); diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 39cc0a6a4d37..b53fd147fa65 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -282,12 +282,12 @@ static void ubiblock_release(struct gendisk *gd) mutex_unlock(&dev->dev_mutex); } -static int ubiblock_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int ubiblock_getgeo(struct gendisk *disk, struct hd_geometry *geo) { /* Some tools might require this information */ geo->heads = 1; geo->cylinders = 1; - geo->sectors = get_capacity(bdev->bd_disk); + geo->sectors = get_capacity(disk); geo->start = 0; return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 2a1aa32e6693..a933db961ed7 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1478,12 +1478,12 @@ static void btt_submit_bio(struct bio *bio) bio_endio(bio); } -static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) +static int btt_getgeo(struct gendisk *disk, struct hd_geometry *geo) { /* some standard values */ geo->heads = 1 << 6; geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bd->bd_disk) >> 11; + geo->cylinders = get_capacity(disk) >> 11; return 0; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 812c1565114f..f96f74bff6ad 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1803,12 +1803,12 @@ static void nvme_release(struct gendisk *disk) nvme_ns_release(disk->private_data); } -int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) +int nvme_getgeo(struct gendisk *disk, struct hd_geometry *geo) { /* some standard values */ geo->heads = 1 << 6; geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bdev->bd_disk) >> 11; + geo->cylinders = get_capacity(disk) >> 11; return 0; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cfd2b5b90b91..102fae6a231c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -936,7 +936,7 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns **id); -int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); +int nvme_getgeo(struct gendisk *disk, struct hd_geometry *geo); int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); extern const struct attribute_group *nvme_ns_attr_groups[]; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 506a947d00a5..582ac7e61b24 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3317,11 +3317,11 @@ static void dasd_release(struct gendisk *disk) /* * Return disk geometry. */ -static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int dasd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { struct dasd_device *base; - base = dasd_device_from_gendisk(bdev->bd_disk); + base = dasd_device_from_gendisk(disk); if (!base) return -ENODEV; @@ -3331,7 +3331,8 @@ static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return -EINVAL; } base->discipline->fill_geometry(base->block, geo); - geo->start = get_start_sect(bdev) >> base->block->s2b_shift; + // geo->start is left unchanged by the above + geo->start >>= base->block->s2b_shift; dasd_put_device(base); return 0; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3edcc43f180b..00ad574ce61c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1599,9 +1599,9 @@ static void sd_release(struct gendisk *disk) scsi_device_put(sdev); } -static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int sd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); + struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; struct Scsi_Host *host = sdp->host; sector_t capacity = logical_to_sectors(sdp, sdkp->capacity); @@ -1614,9 +1614,9 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) /* override with calculated, extended default, or driver values */ if (host->hostt->bios_param) - host->hostt->bios_param(sdp, bdev->bd_disk, capacity, diskinfo); + host->hostt->bios_param(sdp, disk, capacity, diskinfo); else - scsicam_bios_param(bdev->bd_disk, capacity, diskinfo); + scsicam_bios_param(disk, capacity, diskinfo); geo->heads = diskinfo[0]; geo->sectors = diskinfo[1]; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fbc45121cd4f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1659,7 +1659,7 @@ struct block_device_operations { unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); void (*unlock_native_capacity) (struct gendisk *); - int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*getgeo)(struct gendisk *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); void (*free_disk)(struct gendisk *disk); /* this callback is with swap_lock and sometimes page table lock held */ -- cgit v1.2.3 From 4c70fb2624ab1588faa58dcd407d4c61d64b288d Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Wed, 13 Aug 2025 08:29:01 +0000 Subject: cpuset: remove redundant CS_ONLINE flag The CS_ONLINE flag was introduced prior to the CSS_ONLINE flag in the cpuset subsystem. Currently, the flag setting sequence is as follows: 1. cpuset_css_online() sets CS_ONLINE 2. css->flags gets CSS_ONLINE set ... 3. cgroup->kill_css sets CSS_DYING 4. cpuset_css_offline() clears CS_ONLINE 5. css->flags clears CSS_ONLINE The is_cpuset_online() check currently occurs between steps 1 and 3. However, it would be equally safe to perform this check between steps 2 and 3, as CSS_ONLINE provides the same synchronization guarantee as CS_ONLINE. Since CS_ONLINE is redundant with CSS_ONLINE and provides no additional synchronization benefits, we can safely remove it to simplify the code. Signed-off-by: Chen Ridong Acked-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 +++++ kernel/cgroup/cpuset-internal.h | 3 +-- kernel/cgroup/cpuset.c | 4 +--- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..ae73dbb19165 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -354,6 +354,11 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return css->flags & CSS_DYING; } +static inline bool css_is_online(struct cgroup_subsys_state *css) +{ + return css->flags & CSS_ONLINE; +} + static inline bool css_is_self(struct cgroup_subsys_state *css) { if (css == &css->cgroup->self) { diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h index 383963e28ac6..75b3aef39231 100644 --- a/kernel/cgroup/cpuset-internal.h +++ b/kernel/cgroup/cpuset-internal.h @@ -38,7 +38,6 @@ enum prs_errcode { /* bits in struct cpuset flags field */ typedef enum { - CS_ONLINE, CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, CS_MEM_HARDWALL, @@ -202,7 +201,7 @@ static inline struct cpuset *parent_cs(struct cpuset *cs) /* convenient tests for these bits */ static inline bool is_cpuset_online(struct cpuset *cs) { - return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); + return css_is_online(&cs->css) && !css_is_dying(&cs->css); } static inline int is_cpu_exclusive(const struct cpuset *cs) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 27adb04df675..3466ebbf1016 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -207,7 +207,7 @@ static inline void notify_partition_change(struct cpuset *cs, int old_prs) * parallel, we may leave an offline CPU in cpu_allowed or some other masks. */ static struct cpuset top_cpuset = { - .flags = BIT(CS_ONLINE) | BIT(CS_CPU_EXCLUSIVE) | + .flags = BIT(CS_CPU_EXCLUSIVE) | BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE), .partition_root_state = PRS_ROOT, .relax_domain_level = -1, @@ -3496,7 +3496,6 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpus_read_lock(); mutex_lock(&cpuset_mutex); - set_bit(CS_ONLINE, &cs->flags); if (is_spread_page(parent)) set_bit(CS_SPREAD_PAGE, &cs->flags); if (is_spread_slab(parent)) @@ -3571,7 +3570,6 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); cpuset_dec(); - clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); cpus_read_unlock(); -- cgit v1.2.3 From 2caa6b88e0ba0231fb4ff0ba8e73cedd5fb81fc8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 11 Aug 2025 14:08:04 +0200 Subject: bpf: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250811-restricted-pointers-bpf-v1-1-a1d7cc3cb9e7@linutronix.de --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1e7fd3ee759e..52fecb7a1fe3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1296,7 +1296,7 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { - pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen, + pr_err("flen=%u proglen=%u pass=%u image=%p from=%s pid=%d\n", flen, proglen, pass, image, current->comm, task_pid_nr(current)); if (image) -- cgit v1.2.3 From 139235103f6039c2c77cb8f51cb2e7e610fe0114 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 11 Aug 2025 15:35:05 +0800 Subject: net: stmmac: Change first parameter of fix_soc_reset() In order to use netdev_err() to print message in the callback function of fix_soc_reset(), change fix_soc_reset() to have "struct stmmac_priv *" as its first parameter. This is preparation for later patch, no functionality change. Suggested-by: Andrew Lunn Signed-off-by: Tiezhu Yang Link: https://patch.msgid.link/20250811073506.27513-3-yangtiezhu@loongson.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 6 +++--- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +- drivers/net/ethernet/stmicro/stmmac/hwif.c | 2 +- include/linux/stmmac.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 889e2bb6f7f5..c2d9e89f0063 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -49,7 +49,7 @@ struct imx_dwmac_ops { u32 flags; bool mac_rgmii_txclk_auto_adj; - int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); + int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); }; @@ -265,9 +265,9 @@ static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) writel(old_ctrl, dwmac->base_addr + MAC_CTRL_REG); } -static int imx_dwmac_mx93_reset(void *priv, void __iomem *ioaddr) +static int imx_dwmac_mx93_reset(struct stmmac_priv *priv, void __iomem *ioaddr) { - struct plat_stmmacenet_data *plat_dat = priv; + struct plat_stmmacenet_data *plat_dat = priv->plat; u32 value = readl(ioaddr + DMA_BUS_MODE); /* DMA SW reset */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 5769165ee5ba..4c477d6ce1e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -509,7 +509,7 @@ static int loongson_dwmac_acpi_config(struct pci_dev *pdev, } /* Loongson's DWMAC device may take nearly two seconds to complete DMA reset */ -static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) +static int loongson_dwmac_fix_reset(struct stmmac_priv *priv, void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_BUS_MODE); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 99635b37044a..3f7c765dcb79 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -100,7 +100,7 @@ int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr) return -EINVAL; if (plat && plat->fix_soc_reset) - return plat->fix_soc_reset(plat, ioaddr); + return plat->fix_soc_reset(priv, ioaddr); return stmmac_do_callback(priv, dma, reset, ioaddr); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 22c24dacbc65..e284f04964bf 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -238,7 +238,7 @@ struct plat_stmmacenet_data { int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); - int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); + int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); int (*mac_finish)(struct net_device *ndev, -- cgit v1.2.3 From 96326447d466ca62b713f660cfc73ef7879151a0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 12 Aug 2025 06:57:23 +0200 Subject: net: mediatek: wed: Introduce MT7992 WED support to MT7988 SoC Introduce the second WDMA RX ring in WED driver for MT7988 SoC since the Mediatek MT7992 WiFi chipset supports two separated WDMA rings. Add missing MT7988 configurations to properly support WED for MT7992 in MT76 driver. Co-developed-by: Rex Lu Signed-off-by: Rex Lu Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250812-mt7992-wed-support-v3-1-9ada78a819a4@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_wed.c | 33 +++++++++++++++++++----- drivers/net/ethernet/mediatek/mtk_wed.h | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/mmio.c | 6 ++--- drivers/net/wireless/mediatek/mt76/mt7996/mmio.c | 12 ++++----- include/linux/soc/mediatek/mtk_wed.h | 2 +- 5 files changed, 38 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 0a80d8f8cff7..3dbb113b792c 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -59,7 +59,9 @@ struct mtk_wed_flow_block_priv { static const struct mtk_wed_soc_data mt7622_data = { .regmap = { .tx_bm_tkid = 0x088, - .wpdma_rx_ring0 = 0x770, + .wpdma_rx_ring = { + 0x770, + }, .reset_idx_tx_mask = GENMASK(3, 0), .reset_idx_rx_mask = GENMASK(17, 16), }, @@ -70,7 +72,9 @@ static const struct mtk_wed_soc_data mt7622_data = { static const struct mtk_wed_soc_data mt7986_data = { .regmap = { .tx_bm_tkid = 0x0c8, - .wpdma_rx_ring0 = 0x770, + .wpdma_rx_ring = { + 0x770, + }, .reset_idx_tx_mask = GENMASK(1, 0), .reset_idx_rx_mask = GENMASK(7, 6), }, @@ -81,7 +85,10 @@ static const struct mtk_wed_soc_data mt7986_data = { static const struct mtk_wed_soc_data mt7988_data = { .regmap = { .tx_bm_tkid = 0x0c8, - .wpdma_rx_ring0 = 0x7d0, + .wpdma_rx_ring = { + 0x7d0, + 0x7d8, + }, .reset_idx_tx_mask = GENMASK(1, 0), .reset_idx_rx_mask = GENMASK(7, 6), }, @@ -621,8 +628,8 @@ mtk_wed_amsdu_init(struct mtk_wed_device *dev) return ret; } - /* eagle E1 PCIE1 tx ring 22 flow control issue */ - if (dev->wlan.id == 0x7991) + /* Kite and Eagle E1 PCIE1 tx ring 22 flow control issue */ + if (dev->wlan.id == 0x7991 || dev->wlan.id == 0x7992) wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING); wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); @@ -1239,7 +1246,11 @@ mtk_wed_set_wpdma(struct mtk_wed_device *dev) return; wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo); - wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx); + wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[0], + dev->wlan.wpdma_rx[0]); + if (mtk_wed_is_v3_or_greater(dev->hw)) + wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[1], + dev->wlan.wpdma_rx[1]); if (!dev->wlan.hw_rro) return; @@ -2323,6 +2334,16 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) if (!dev->rx_wdma[i].desc) mtk_wed_wdma_rx_ring_setup(dev, i, 16, false); + if (dev->wlan.hw_rro) { + for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) { + u32 addr = MTK_WED_RRO_MSDU_PG_CTRL0(i) + + MTK_WED_RING_OFS_COUNT; + + if (!wed_r32(dev, addr)) + wed_w32(dev, addr, 1); + } + } + mtk_wed_hw_init(dev); mtk_wed_configure_irq(dev, irq_mask); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h index c1f0479d7a71..b49aee9a8b65 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.h +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -17,7 +17,7 @@ struct mtk_wed_wo; struct mtk_wed_soc_data { struct { u32 tx_bm_tkid; - u32 wpdma_rx_ring0; + u32 wpdma_rx_ring[MTK_WED_RX_QUEUES]; u32 reset_idx_tx_mask; u32 reset_idx_rx_mask; } regmap; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index 4a82f8e4c118..36488aa6cc20 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -664,8 +664,8 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, MT_RXQ_WED_RING_BASE; wed->wlan.wpdma_rx_glo = pci_resource_start(pci_dev, 0) + MT_WPDMA_GLO_CFG; - wed->wlan.wpdma_rx = pci_resource_start(pci_dev, 0) + - MT_RXQ_WED_DATA_RING_BASE; + wed->wlan.wpdma_rx[0] = pci_resource_start(pci_dev, 0) + + MT_RXQ_WED_DATA_RING_BASE; } else { struct platform_device *plat_dev = pdev_ptr; struct resource *res; @@ -687,7 +687,7 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, wed->wlan.wpdma_tx = res->start + MT_TXQ_WED_RING_BASE; wed->wlan.wpdma_txfree = res->start + MT_RXQ_WED_RING_BASE; wed->wlan.wpdma_rx_glo = res->start + MT_WPDMA_GLO_CFG; - wed->wlan.wpdma_rx = res->start + MT_RXQ_WED_DATA_RING_BASE; + wed->wlan.wpdma_rx[0] = res->start + MT_RXQ_WED_DATA_RING_BASE; } wed->wlan.nbuf = MT7915_HW_TOKEN_SIZE; wed->wlan.tx_tbit[0] = is_mt7915(&dev->mt76) ? 4 : 30; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c index 30b40f4a91be..fb2428a9b877 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c @@ -503,9 +503,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr, } wed->wlan.wpdma_rx_glo = wed->wlan.phy_base + hif1_ofs + MT_WFDMA0_GLO_CFG; - wed->wlan.wpdma_rx = wed->wlan.phy_base + hif1_ofs + - MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + - MT7996_RXQ_BAND0 * MT_RING_SIZE; + wed->wlan.wpdma_rx[0] = wed->wlan.phy_base + hif1_ofs + + MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + + MT7996_RXQ_BAND0 * MT_RING_SIZE; wed->wlan.id = MT7996_DEVICE_ID_2; wed->wlan.tx_tbit[0] = ffs(MT_INT_TX_DONE_BAND2) - 1; @@ -518,9 +518,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr, wed->wlan.wpdma_rx_glo = wed->wlan.phy_base + MT_WFDMA0_GLO_CFG; - wed->wlan.wpdma_rx = wed->wlan.phy_base + - MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + - MT7996_RXQ_BAND0 * MT_RING_SIZE; + wed->wlan.wpdma_rx[0] = wed->wlan.phy_base + + MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) + + MT7996_RXQ_BAND0 * MT_RING_SIZE; wed->wlan.wpdma_rx_rro[0] = wed->wlan.phy_base + MT_RXQ_RING_BASE(MT7996_RXQ_RRO_BAND0) + diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index d8949a4ed0dc..c4ff6bab176d 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -147,7 +147,7 @@ struct mtk_wed_device { u32 wpdma_tx; u32 wpdma_txfree; u32 wpdma_rx_glo; - u32 wpdma_rx; + u32 wpdma_rx[MTK_WED_RX_QUEUES]; u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; u32 wpdma_rx_pg; -- cgit v1.2.3 From fb2f2a86f0cd9690357b9bb67af00d386a7e819f Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 16 Jun 2025 13:03:21 +0200 Subject: ice: cleanup capabilities evaluation When evaluating the capabilities field, the ICE_AQC_BIT_ROCEV2_LAG and ICE_AQC_BIT_SRIOV_LAG defines were both not using the BIT operator, instead simply setting a hex value that set the correct bits. While not inaccurate, this method is misleading, and when it is expanded in the following implementation it becomes even more confusing. Switch to using the BIT() operator to clarify what is being checked. Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Reviewed-by: Marcin Szycik Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index 012b5d499c1a..dbe93f940ef0 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -192,8 +192,8 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085 #define LIBIE_AQC_CAPS_NAC_TOPOLOGY 0x0087 #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 -#define LIBIE_AQC_BIT_ROCEV2_LAG 0x01 -#define LIBIE_AQC_BIT_SRIOV_LAG 0x02 +#define LIBIE_AQC_BIT_ROCEV2_LAG BIT(0) +#define LIBIE_AQC_BIT_SRIOV_LAG BIT(1) #define LIBIE_AQC_CAPS_FLEX10 0x00F1 #define LIBIE_AQC_CAPS_CEM 0x00F2 -- cgit v1.2.3 From 8d90041a0d285044b89629f539ca0685e156848b Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 14 Aug 2025 11:22:13 -0400 Subject: dlm: handle release_option as unsigned Future patches will introduce a invalid argument check for undefined values. All values for release_option are positive integer values to not check on negative values as well we just change the parameter to unsigned int. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lockspace.c | 8 ++++---- include/linux/dlm.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index d986b7ef153d..8eadbf0fdf17 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -676,7 +676,7 @@ int dlm_new_user_lockspace(const char *name, const char *cluster, This is because there may be LKBs queued as ASTs that have been unlinked from their RSBs and are pending deletion once the AST has been delivered */ -static int lockspace_busy(struct dlm_ls *ls, int release_option) +static int lockspace_busy(struct dlm_ls *ls, unsigned int release_option) { struct dlm_lkb *lkb; unsigned long id; @@ -704,7 +704,7 @@ static int lockspace_busy(struct dlm_ls *ls, int release_option) return rv; } -static int release_lockspace(struct dlm_ls *ls, int release_option) +static int release_lockspace(struct dlm_ls *ls, unsigned int release_option) { int busy, rv; @@ -792,7 +792,7 @@ static int release_lockspace(struct dlm_ls *ls, int release_option) * See DLM_RELEASE defines for release_option values and their meaning. */ -int dlm_release_lockspace(void *lockspace, int force) +int dlm_release_lockspace(void *lockspace, unsigned int release_option) { struct dlm_ls *ls; int error; @@ -803,7 +803,7 @@ int dlm_release_lockspace(void *lockspace, int force) dlm_put_lockspace(ls); mutex_lock(&ls_lock); - error = release_lockspace(ls, force); + error = release_lockspace(ls, release_option); if (!error) ls_count--; if (!ls_count) diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 108eb953eb18..34015a008b80 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -122,7 +122,8 @@ int dlm_new_lockspace(const char *name, const char *cluster, * release_option: see DLM_RELEASE values above. */ -int dlm_release_lockspace(dlm_lockspace_t *lockspace, int release_option); +int dlm_release_lockspace(dlm_lockspace_t *lockspace, + unsigned int release_option); /* * dlm_lock -- cgit v1.2.3 From 8e40210788636619404871df07445fa4590138b4 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 14 Aug 2025 11:22:14 -0400 Subject: dlm: check for undefined release_option values Checking on all undefined release_option values to return -EINVAL in case a user is providing them to dlm_release_lockspace(). Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lockspace.c | 3 +++ include/linux/dlm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 8eadbf0fdf17..ddaa76558706 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -797,6 +797,9 @@ int dlm_release_lockspace(void *lockspace, unsigned int release_option) struct dlm_ls *ls; int error; + if (release_option > __DLM_RELEASE_MAX) + return -EINVAL; + ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 34015a008b80..7e7b45b0d097 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -113,6 +113,7 @@ int dlm_new_lockspace(const char *name, const char *cluster, #define DLM_RELEASE_NORMAL 2 #define DLM_RELEASE_NO_EVENT 3 #define DLM_RELEASE_RECOVER 4 +#define __DLM_RELEASE_MAX DLM_RELEASE_RECOVER /* * dlm_release_lockspace -- cgit v1.2.3 From dab32f2576a39d5f54f3dbbbc718d92fa5109ce9 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 7 Aug 2025 15:55:39 +0200 Subject: s390/pci: Use pci_uevent_ers() in PCI recovery Issue uevents on s390 during PCI recovery using pci_uevent_ers() as done by EEH and AER PCIe recovery routines. Signed-off-by: Niklas Schnelle Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20250807-add_err_uevents-v5-2-adf85b0620b0@linux.ibm.com --- arch/s390/pci/pci_event.c | 3 +++ drivers/pci/pci-driver.c | 2 +- include/linux/pci.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index d930416d4c90..b95376041501 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -88,6 +88,7 @@ static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); + pci_uevent_ers(pdev, ers_res); if (ers_result_indicates_abort(ers_res)) pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); else if (ers_res == PCI_ERS_RESULT_NEED_RESET) @@ -244,6 +245,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) ers_res = PCI_ERS_RESULT_RECOVERED; if (ers_res != PCI_ERS_RESULT_RECOVERED) { + pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT); pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); status_str = "failed (driver can't recover)"; @@ -253,6 +255,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); if (driver->err_handler->resume) driver->err_handler->resume(pdev); + pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED); out_unlock: pci_dev_unlock(pdev); zpci_report_status(zdev, "recovery", status_str); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 6405acdb5d0f..302d61783f6c 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1582,7 +1582,7 @@ static int pci_uevent(const struct device *dev, struct kobj_uevent_env *env) return 0; } -#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) || defined(CONFIG_S390) /** * pci_uevent_ers - emit a uevent during recovery path of PCI device * @pdev: PCI device undergoing error recovery diff --git a/include/linux/pci.h b/include/linux/pci.h index 59876de13860..7735acf6f349 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2764,7 +2764,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } -#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) || defined(CONFIG_S390) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif -- cgit v1.2.3 From f39494089aaa1022008eee245fb83ef1ae911b6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Jul 2025 21:09:13 -0700 Subject: srcu: Move rcu_is_watching() checks to srcu_read_{,un}lock_fast() The rcu_is_watching() warnings are currently in the SRCU-tree implementations of __srcu_read_lock_fast() and __srcu_read_unlock_fast(). However, this makes it difficult to create _notrace variants of srcu_read_lock_fast() and srcu_read_unlock_fast(). This commit therefore moves these checks to srcu_read_lock_fast(), srcu_read_unlock_fast(), srcu_down_read_fast(), and srcu_up_read_fast(). Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 4 ++++ include/linux/srcutree.h | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f179700fecaf..478c73d067f7 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -275,6 +275,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * { struct srcu_ctr __percpu *retval; + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); retval = __srcu_read_lock_fast(ssp); rcu_try_lock_acquire(&ssp->dep_map); @@ -295,6 +296,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) { WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_down_read_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); return __srcu_read_lock_fast(ssp); } @@ -389,6 +391,7 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); srcu_lock_release(&ssp->dep_map); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } /** @@ -405,6 +408,7 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_up_read_fast()."); } /** diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index bf44d8d1e69e..043b5a67ef71 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -244,7 +244,6 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) this_cpu_inc(scp->srcu_locks.counter); /* Y */ else @@ -275,7 +274,6 @@ static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ else atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -- cgit v1.2.3 From 7e2a2d060da4860af37e1000dc62a30a1551d9e8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jul 2025 09:12:16 -0700 Subject: srcu: Add srcu_read_lock_fast_notrace() and srcu_read_unlock_fast_notrace() This commit adds no-trace variants of the srcu_read_lock_fast() and srcu_read_unlock_fast() functions for tracing use. [ paulmck: Apply notrace feedback from Joel Fernandes, Steven Rostedt, and Mathieu Desnoyers. ] [ paulmck: Apply excess-notrace feedback from Boqun Feng. ] Link: https://lore.kernel.org/all/20250721162433.10454-1-paulmck@kernel.org Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 25 +++++++++++++++++++++++++ include/linux/srcutree.h | 5 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 478c73d067f7..7a692bf8f99b 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -282,6 +282,20 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * return retval; } +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_lock_fast() for more information. + */ +static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_struct *ssp) + __acquires(ssp) +{ + struct srcu_ctr __percpu *retval; + + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + retval = __srcu_read_lock_fast(ssp); + return retval; +} + /** * srcu_down_read_fast - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -394,6 +408,17 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_unlock_fast() for more information. + */ +static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp, + struct srcu_ctr __percpu *scp) __releases(ssp) +{ + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + __srcu_read_unlock_fast(ssp, scp); +} + /** * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 043b5a67ef71..4d2fee4d3828 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -240,7 +240,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * on architectures that support NMIs but do not supply NMI-safe * implementations of this_cpu_inc(). */ -static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); @@ -267,7 +267,8 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct * on architectures that support NMIs but do not supply NMI-safe * implementations of this_cpu_inc(). */ -static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +static inline void notrace +__srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) { barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) -- cgit v1.2.3 From cacadb630375b8c30ca4d0300812178bb884c0b0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jul 2025 09:19:39 -0700 Subject: srcu: Add guards for notrace variants of SRCU-fast readers This adds the usual scoped_guard(srcu_fast_notrace, &my_srcu) and guard(srcu_fast_notrace)(&my_srcu). Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 7a692bf8f99b..ada65b58bc4c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -515,4 +515,9 @@ DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct, srcu_read_unlock_fast(_T->lock, _T->scp), struct srcu_ctr __percpu *scp) +DEFINE_LOCK_GUARD_1(srcu_fast_notrace, struct srcu_struct, + _T->scp = srcu_read_lock_fast_notrace(_T->lock), + srcu_read_unlock_fast_notrace(_T->lock, _T->scp), + struct srcu_ctr __percpu *scp) + #endif -- cgit v1.2.3 From 28f073b38372b99d8d33ff5e63897d28419bda20 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 16 Jun 2025 13:03:23 +0200 Subject: ice: Implement support for SRIOV VFs across Active/Active bonds This patch implements the software flows to handle SRIOV VF communication across an Active/Active link aggregate. The same restrictions apply as are in place for the support of Active/Backup bonds. - the two interfaces must be on the same NIC - the FW LLDP engine needs to be disabled - the DDP package that supports VF LAG must be loaded on device - the two interfaces must have the same QoS config - only the first interface added to the bond will have VF support - the interface with VFs must be in switchdev mode With the additional requirement of - the version of the FW on the NIC needs to have VF Active/Active support This requirement is indicated in the capabilities struct associated with the NVM loaded on the NIC. The balancing of traffic between the two interfaces is done on a queue basis. Taking the queues allocated to all of the VFs as a whole, one half of them will be distributed to each interface. When a link goes down, then the queues allocated to the down interface will migrate to the active port. When the down port comes back up, then the same queues as were originally assigned there will be moved back. Co-developed-by: Marcin Szycik Signed-off-by: Marcin Szycik Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 4 + drivers/net/ethernet/intel/ice/ice_common.c | 15 +- drivers/net/ethernet/intel/ice/ice_common.h | 2 +- drivers/net/ethernet/intel/ice/ice_lag.c | 772 +++++++++++++++++++++--- drivers/net/ethernet/intel/ice/ice_lag.h | 21 +- drivers/net/ethernet/intel/ice/ice_type.h | 1 + include/linux/net/intel/libie/adminq.h | 1 + 8 files changed, 713 insertions(+), 104 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2098f00b3cd3..a83fdd22cbe4 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -203,6 +203,7 @@ enum ice_feature { ICE_F_GCS, ICE_F_ROCE_LAG, ICE_F_SRIOV_LAG, + ICE_F_SRIOV_AA_LAG, ICE_F_MBX_LIMIT, ICE_F_MAX }; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 3bd3ea3af888..caae1780fd37 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -2060,6 +2060,10 @@ struct ice_aqc_cfg_txqs { #define ICE_AQC_Q_CFG_SRC_PRT_M 0x7 #define ICE_AQC_Q_CFG_DST_PRT_S 3 #define ICE_AQC_Q_CFG_DST_PRT_M (0x7 << ICE_AQC_Q_CFG_DST_PRT_S) +#define ICE_AQC_Q_CFG_MODE_M GENMASK(7, 6) +#define ICE_AQC_Q_CFG_MODE_SAME_PF 0x0 +#define ICE_AQC_Q_CFG_MODE_GIVE_OWN 0x1 +#define ICE_AQC_Q_CFG_MODE_KEEP_OWN 0x2 u8 time_out; #define ICE_AQC_Q_CFG_TIMEOUT_S 2 #define ICE_AQC_Q_CFG_TIMEOUT_M (0x1F << ICE_AQC_Q_CFG_TIMEOUT_S) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 209f42045fea..808870539667 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2424,6 +2424,9 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps, caps->sriov_lag = number & LIBIE_AQC_BIT_SRIOV_LAG; ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n", prefix, caps->sriov_lag); + caps->sriov_aa_lag = number & LIBIE_AQC_BIT_SRIOV_AA_LAG; + ice_debug(hw, ICE_DBG_INIT, "%s: sriov_aa_lag = %u\n", + prefix, caps->sriov_aa_lag); break; case LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE: caps->tx_sched_topo_comp_mode_en = (number == 1); @@ -4712,24 +4715,24 @@ do_aq: } /** - * ice_aq_cfg_lan_txq + * ice_aq_cfg_lan_txq - send AQ command 0x0C32 to FW * @hw: pointer to the hardware structure * @buf: buffer for command * @buf_size: size of buffer in bytes * @num_qs: number of queues being configured * @oldport: origination lport * @newport: destination lport + * @mode: cmd_type for move to use * @cd: pointer to command details structure or NULL * * Move/Configure LAN Tx queue (0x0C32) * - * There is a better AQ command to use for moving nodes, so only coding - * this one for configuring the node. + * Return: Zero on success, associated error code on failure. */ int ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, u16 buf_size, u16 num_qs, u8 oldport, u8 newport, - struct ice_sq_cd *cd) + u8 mode, struct ice_sq_cd *cd) { struct ice_aqc_cfg_txqs *cmd; struct libie_aq_desc desc; @@ -4742,10 +4745,12 @@ ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, if (!buf) return -EINVAL; - cmd->cmd_type = ICE_AQC_Q_CFG_TC_CHNG; + cmd->cmd_type = mode; cmd->num_qs = num_qs; cmd->port_num_chng = (oldport & ICE_AQC_Q_CFG_SRC_PRT_M); cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_DST_PRT_M, newport); + cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_MODE_M, + ICE_AQC_Q_CFG_MODE_KEEP_OWN); cmd->time_out = FIELD_PREP(ICE_AQC_Q_CFG_TIMEOUT_M, 5); cmd->blocked_cgds = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 60320cdf7804..dba15ad315a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -270,7 +270,7 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, int ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, u16 buf_size, u16 num_qs, u8 oldport, u8 newport, - struct ice_sq_cd *cd); + u8 mode, struct ice_sq_cd *cd); int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle); void ice_replay_post(struct ice_hw *hw); struct ice_q_ctx * diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 72284555b98a..80312e1dcf7f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -14,6 +14,9 @@ static const u8 lacp_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x88, 0x09, 0, 0 }; +static const u8 act_act_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 }; #define ICE_RECIPE_LEN 64 #define ICE_LAG_SRIOV_CP_RECIPE 10 @@ -48,10 +51,10 @@ static void ice_lag_set_primary(struct ice_lag *lag) } /** - * ice_lag_set_backup - set PF LAG state to Backup + * ice_lag_set_bkup - set PF LAG state to Backup * @lag: LAG info struct */ -static void ice_lag_set_backup(struct ice_lag *lag) +static void ice_lag_set_bkup(struct ice_lag *lag) { struct ice_pf *pf = lag->pf; @@ -337,25 +340,15 @@ ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add) } /** - * ice_lag_cfg_pf_fltrs - set filters up for new active port + * ice_lag_cfg_pf_fltrs_act_bkup - set filters up for new active port * @lag: local interfaces lag struct - * @ptr: opaque data containing notifier event + * @bonding_info: netdev event bonding info */ static void -ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) +ice_lag_cfg_pf_fltrs_act_bkup(struct ice_lag *lag, + struct netdev_bonding_info *bonding_info) { - struct netdev_notifier_bonding_info *info = ptr; - struct netdev_bonding_info *bonding_info; - struct net_device *event_netdev; - struct device *dev; - - event_netdev = netdev_notifier_info_to_dev(ptr); - /* not for this netdev */ - if (event_netdev != lag->netdev) - return; - - bonding_info = &info->bonding_info; - dev = ice_pf_to_dev(lag->pf); + struct device *dev = ice_pf_to_dev(lag->pf); /* interface not active - remove old default VSI rule */ if (bonding_info->slave.state && lag->pf_rx_rule_id) { @@ -376,12 +369,13 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) } /** - * ice_lag_cfg_cp_fltr - configure filter for control packets + * ice_lag_cfg_lp_fltr - configure lport filters * @lag: local interface's lag struct * @add: add or remove rule + * @cp: control packet only or general PF lport rule */ static void -ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add) +ice_lag_cfg_lp_fltr(struct ice_lag *lag, bool add, bool cp) { struct ice_sw_rule_lkup_rx_tx *s_rule; struct ice_vsi *vsi = lag->pf->vsi[0]; @@ -395,8 +389,17 @@ ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add) } if (add) { - s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); - s_rule->recipe_id = cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE); + if (cp) { + s_rule->recipe_id = + cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE); + memcpy(s_rule->hdr_data, lacp_train_pkt, + ICE_TRAIN_PKT_LEN); + } else { + s_rule->recipe_id = cpu_to_le16(lag->act_act_recipe); + memcpy(s_rule->hdr_data, act_act_train_pkt, + ICE_TRAIN_PKT_LEN); + } + s_rule->src = cpu_to_le16(vsi->port_info->lport); s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI | ICE_SINGLE_ACT_LAN_ENABLE | @@ -404,27 +407,66 @@ ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add) FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi->vsi_num)); s_rule->hdr_len = cpu_to_le16(ICE_TRAIN_PKT_LEN); - memcpy(s_rule->hdr_data, lacp_train_pkt, ICE_TRAIN_PKT_LEN); + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); opc = ice_aqc_opc_add_sw_rules; } else { opc = ice_aqc_opc_remove_sw_rules; - s_rule->index = cpu_to_le16(lag->cp_rule_idx); + if (cp) + s_rule->index = cpu_to_le16(lag->cp_rule_idx); + else + s_rule->index = cpu_to_le16(lag->act_act_rule_idx); } if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) { - netdev_warn(lag->netdev, "Error %s CP rule for fail-over\n", - add ? "ADDING" : "REMOVING"); + netdev_warn(lag->netdev, "Error %s %s rule for aggregate\n", + add ? "ADDING" : "REMOVING", + cp ? "CONTROL PACKET" : "LPORT"); goto err_cp_free; } - if (add) - lag->cp_rule_idx = le16_to_cpu(s_rule->index); - else - lag->cp_rule_idx = 0; + if (add) { + if (cp) + lag->cp_rule_idx = le16_to_cpu(s_rule->index); + else + lag->act_act_rule_idx = le16_to_cpu(s_rule->index); + } else { + if (cp) + lag->cp_rule_idx = 0; + else + lag->act_act_rule_idx = 0; + } err_cp_free: kfree(s_rule); } +/** + * ice_lag_cfg_pf_fltrs - set filters up for PF traffic + * @lag: local interfaces lag struct + * @ptr: opaque data containing notifier event + */ +static void +ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) +{ + struct netdev_notifier_bonding_info *info = ptr; + struct netdev_bonding_info *bonding_info; + struct net_device *event_netdev; + + event_netdev = netdev_notifier_info_to_dev(ptr); + if (event_netdev != lag->netdev) + return; + + bonding_info = &info->bonding_info; + + if (lag->bond_aa) { + if (lag->need_fltr_cfg) { + ice_lag_cfg_lp_fltr(lag, true, false); + lag->need_fltr_cfg = false; + } + } else { + ice_lag_cfg_pf_fltrs_act_bkup(lag, bonding_info); + } +} + /** * ice_display_lag_info - print LAG info * @lag: LAG info struct @@ -648,7 +690,7 @@ ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport, } if (ice_aq_cfg_lan_txq(&lag->pf->hw, qbuf, qbuf_size, valq, oldport, - newport, NULL)) { + newport, ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG failover\n"); goto qbuf_err; } @@ -784,10 +826,17 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf) if (lag->upper_netdev) ice_lag_build_netdev_list(lag, &ndlist); - if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) && - lag->bonded && lag->primary && pri_port != act_port && - !list_empty(lag->netdev_head)) - ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx); + if (lag->bonded && lag->primary && !list_empty(lag->netdev_head)) { + if (lag->bond_aa && + ice_is_feature_supported(pf, ICE_F_SRIOV_AA_LAG)) + ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL); + + if (!lag->bond_aa && + ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) && + pri_port != act_port) + ice_lag_move_single_vf_nodes(lag, pri_port, act_port, + vsi->idx); + } ice_lag_destroy_netdev_list(lag, &ndlist); @@ -851,11 +900,20 @@ u8 ice_lag_prepare_vf_reset(struct ice_lag *lag) u8 pri_prt, act_prt; if (lag && lag->bonded && lag->primary && lag->upper_netdev) { - pri_prt = lag->pf->hw.port_info->lport; - act_prt = lag->active_port; - if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT) { - ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); - return act_prt; + if (!lag->bond_aa) { + pri_prt = lag->pf->hw.port_info->lport; + act_prt = lag->active_port; + if (act_prt != pri_prt && + act_prt != ICE_LAG_INVALID_PORT) { + ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); + return act_prt; + } + } else { + if (lag->port_bitmap & ICE_LAGS_M) { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, NULL); + lag->port_bitmap |= ICE_LAGS_M; + } } } @@ -873,10 +931,15 @@ void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt) { u8 pri_prt; - if (lag && lag->bonded && lag->primary && - act_prt != ICE_LAG_INVALID_PORT) { - pri_prt = lag->pf->hw.port_info->lport; - ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + if (lag && lag->bonded && lag->primary) { + if (!lag->bond_aa) { + pri_prt = lag->pf->hw.port_info->lport; + if (act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, + act_prt); + } else { + ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL); + } } } @@ -912,7 +975,7 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) } if (bonding_info->slave.state) - ice_lag_set_backup(lag); + ice_lag_set_bkup(lag); else ice_lag_set_primary(lag); @@ -920,6 +983,295 @@ lag_out: ice_display_lag_info(lag); } +/** + * ice_lag_aa_qbuf_recfg - fill a single queue buffer for recfg cmd + * @hw: HW struct that contains the queue context + * @qbuf: pointer to single queue buffer + * @vsi_num: index of the VF VSI in PF space + * @qnum: queue index + * + * Return: Zero on success, error code on failure. + */ +static int +ice_lag_aa_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf, + u16 vsi_num, int qnum) +{ + struct ice_pf *pf = hw->back; + struct ice_q_ctx *q_ctx; + u16 q_id; + + q_ctx = ice_get_lan_q_ctx(hw, vsi_num, 0, qnum); + if (!q_ctx) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d no Q context\n", qnum); + return -ENOENT; + } + + if (q_ctx->q_teid == ICE_INVAL_TEID) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL TEID\n", qnum); + return -EINVAL; + } + + if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL Q HANDLE\n", qnum); + return -EINVAL; + } + + q_id = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle]; + qbuf->queue_info[0].q_handle = cpu_to_le16(q_id); + qbuf->queue_info[0].tc = 0; + qbuf->queue_info[0].q_teid = cpu_to_le32(q_ctx->q_teid); + + return 0; +} + +/** + * ice_lag_aa_move_vf_qs - Move some/all VF queues to destination + * @lag: primary interface's lag struct + * @dest: index of destination port + * @vsi_num: index of VF VSI in PF space + * @all: if true move all queues to destination + * @odd: VF wide q indicator for odd/even + * @e_pf: PF struct for the event interface + * + * the parameter "all" is to control whether we are splitting the queues + * between two interfaces or moving them all to the destination interface + */ +static void ice_lag_aa_move_vf_qs(struct ice_lag *lag, u8 dest, u16 vsi_num, + bool all, bool *odd, struct ice_pf *e_pf) +{ + DEFINE_RAW_FLEX(struct ice_aqc_cfg_txqs_buf, qbuf, queue_info, 1); + struct ice_hw *old_hw, *new_hw, *pri_hw, *sec_hw; + struct device *dev = ice_pf_to_dev(lag->pf); + struct ice_vsi_ctx *pv_ctx, *sv_ctx; + struct ice_lag_netdev_list ndlist; + u16 num_q, qbuf_size, sec_vsi_num; + u8 pri_lport, sec_lport; + u32 pvf_teid, svf_teid; + u16 vf_id; + + vf_id = lag->pf->vsi[vsi_num]->vf->vf_id; + /* If sec_vf[] not defined, then no second interface to share with */ + if (lag->sec_vf[vf_id]) + sec_vsi_num = lag->sec_vf[vf_id]->idx; + else + return; + + pri_lport = lag->bond_lport_pri; + sec_lport = lag->bond_lport_sec; + + if (pri_lport == ICE_LAG_INVALID_PORT || + sec_lport == ICE_LAG_INVALID_PORT) + return; + + if (!e_pf) + ice_lag_build_netdev_list(lag, &ndlist); + + pri_hw = &lag->pf->hw; + if (e_pf && lag->pf != e_pf) + sec_hw = &e_pf->hw; + else + sec_hw = ice_lag_find_hw_by_lport(lag, sec_lport); + + if (!pri_hw || !sec_hw) + return; + + if (dest == ICE_LAGP_IDX) { + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(lag->pf); + if (!vsi) + return; + + old_hw = sec_hw; + new_hw = pri_hw; + ice_lag_config_eswitch(lag, vsi->netdev); + } else { + struct ice_pf *sec_pf = sec_hw->back; + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(sec_pf); + if (!vsi) + return; + + old_hw = pri_hw; + new_hw = sec_hw; + ice_lag_config_eswitch(lag, vsi->netdev); + } + + pv_ctx = ice_get_vsi_ctx(pri_hw, vsi_num); + if (!pv_ctx) { + dev_warn(dev, "Unable to locate primary VSI %d context for LAG failover\n", + vsi_num); + return; + } + + sv_ctx = ice_get_vsi_ctx(sec_hw, sec_vsi_num); + if (!sv_ctx) { + dev_warn(dev, "Unable to locate secondary VSI %d context for LAG failover\n", + vsi_num); + return; + } + + num_q = pv_ctx->num_lan_q_entries[0]; + qbuf_size = __struct_size(qbuf); + + /* Suspend traffic for primary VSI VF */ + pvf_teid = le32_to_cpu(pv_ctx->sched.vsi_node[0]->info.node_teid); + ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, true); + + /* Suspend traffic for secondary VSI VF */ + svf_teid = le32_to_cpu(sv_ctx->sched.vsi_node[0]->info.node_teid); + ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, true); + + for (int i = 0; i < num_q; i++) { + struct ice_sched_node *n_prt, *q_node, *parent; + struct ice_port_info *pi, *new_pi; + struct ice_vsi_ctx *src_ctx; + struct ice_sched_node *p; + struct ice_q_ctx *q_ctx; + u16 dst_vsi_num; + + pi = old_hw->port_info; + new_pi = new_hw->port_info; + + *odd = !(*odd); + if ((dest == ICE_LAGP_IDX && *odd && !all) || + (dest == ICE_LAGS_IDX && !(*odd) && !all) || + lag->q_home[vf_id][i] == dest) + continue; + + if (dest == ICE_LAGP_IDX) + dst_vsi_num = vsi_num; + else + dst_vsi_num = sec_vsi_num; + + n_prt = ice_sched_get_free_qparent(new_hw->port_info, + dst_vsi_num, 0, + ICE_SCHED_NODE_OWNER_LAN); + if (!n_prt) + continue; + + q_ctx = ice_get_lan_q_ctx(pri_hw, vsi_num, 0, i); + if (!q_ctx) + continue; + + if (dest == ICE_LAGP_IDX) + src_ctx = sv_ctx; + else + src_ctx = pv_ctx; + + q_node = ice_sched_find_node_by_teid(src_ctx->sched.vsi_node[0], + q_ctx->q_teid); + if (!q_node) + continue; + + qbuf->src_parent_teid = q_node->info.parent_teid; + qbuf->dst_parent_teid = n_prt->info.node_teid; + + /* Move the node in the HW/FW */ + if (ice_lag_aa_qbuf_recfg(pri_hw, qbuf, vsi_num, i)) + continue; + + if (dest == ICE_LAGP_IDX) + ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1, + sec_lport, pri_lport, + ICE_AQC_Q_CFG_MOVE_TC_CHNG, + NULL); + else + ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1, + pri_lport, sec_lport, + ICE_AQC_Q_CFG_MOVE_TC_CHNG, + NULL); + + /* Move the node in the SW */ + parent = q_node->parent; + if (!parent) + continue; + + for (int n = 0; n < parent->num_children; n++) { + int j; + + if (parent->children[n] != q_node) + continue; + + for (j = n + 1; j < parent->num_children; + j++) { + parent->children[j - 1] = + parent->children[j]; + } + parent->children[j] = NULL; + parent->num_children--; + break; + } + + p = pi->sib_head[0][q_node->tx_sched_layer]; + while (p) { + if (p->sibling == q_node) { + p->sibling = q_node->sibling; + break; + } + p = p->sibling; + } + + if (pi->sib_head[0][q_node->tx_sched_layer] == q_node) + pi->sib_head[0][q_node->tx_sched_layer] = + q_node->sibling; + + q_node->parent = n_prt; + q_node->info.parent_teid = n_prt->info.node_teid; + q_node->sibling = NULL; + p = new_pi->sib_head[0][q_node->tx_sched_layer]; + if (p) { + while (p) { + if (!p->sibling) { + p->sibling = q_node; + break; + } + p = p->sibling; + } + } else { + new_pi->sib_head[0][q_node->tx_sched_layer] = + q_node; + } + + n_prt->children[n_prt->num_children++] = q_node; + lag->q_home[vf_id][i] = dest; + } + + ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, false); + ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, false); + + if (!e_pf) + ice_lag_destroy_netdev_list(lag, &ndlist); +} + +/** + * ice_lag_aa_failover - move VF queues in A/A mode + * @lag: primary lag struct + * @dest: index of destination port + * @e_pf: PF struct for event port + */ +void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf) +{ + bool odd = true, all = false; + int i; + + /* Primary can be a target if down (cleanup), but secondary can't */ + if (dest == ICE_LAGS_IDX && !(lag->port_bitmap & ICE_LAGS_M)) + return; + + /* Move all queues to a destination if only one port is active, + * or no ports are active and dest is primary. + */ + if ((lag->port_bitmap ^ (ICE_LAGP_M | ICE_LAGS_M)) || + (!lag->port_bitmap && dest == ICE_LAGP_IDX)) + all = true; + + ice_for_each_vsi(lag->pf, i) + if (lag->pf->vsi[i] && lag->pf->vsi[i]->type == ICE_VSI_VF) + ice_lag_aa_move_vf_qs(lag, dest, i, all, &odd, e_pf); +} + /** * ice_lag_reclaim_vf_tc - move scheduling nodes back to primary interface * @lag: primary interface lag struct @@ -982,7 +1334,7 @@ ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num, if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, src_hw->port_info->lport, hw->port_info->lport, - NULL)) { + ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG failover\n"); goto reclaim_qerr; } @@ -1053,14 +1405,15 @@ static void ice_lag_link(struct ice_lag *lag) lag->bonded = true; lag->role = ICE_LAG_UNSET; + lag->need_fltr_cfg = true; netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); } /** - * ice_lag_unlink - handle unlink event + * ice_lag_act_bkup_unlink - handle unlink event for A/B bond * @lag: LAG info struct */ -static void ice_lag_unlink(struct ice_lag *lag) +static void ice_lag_act_bkup_unlink(struct ice_lag *lag) { u8 pri_port, act_port, loc_port; struct ice_pf *pf = lag->pf; @@ -1096,10 +1449,32 @@ static void ice_lag_unlink(struct ice_lag *lag) } } } +} - lag->bonded = false; - lag->role = ICE_LAG_NONE; - lag->upper_netdev = NULL; +/** + * ice_lag_aa_unlink - handle unlink event for Active-Active bond + * @lag: LAG info struct + */ +static void ice_lag_aa_unlink(struct ice_lag *lag) +{ + struct ice_lag *pri_lag; + + if (lag->primary) { + pri_lag = lag; + lag->port_bitmap &= ~ICE_LAGP_M; + } else { + pri_lag = ice_lag_find_primary(lag); + if (pri_lag) + pri_lag->port_bitmap &= ICE_LAGS_M; + } + + if (pri_lag) { + ice_lag_aa_failover(pri_lag, ICE_LAGP_IDX, lag->pf); + if (lag->primary) + pri_lag->bond_lport_pri = ICE_LAG_INVALID_PORT; + else + pri_lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + } } /** @@ -1115,10 +1490,20 @@ static void ice_lag_link_unlink(struct ice_lag *lag, void *ptr) if (netdev != lag->netdev) return; - if (info->linking) + if (info->linking) { ice_lag_link(lag); - else - ice_lag_unlink(lag); + } else { + if (lag->bond_aa) + ice_lag_aa_unlink(lag); + else + ice_lag_act_bkup_unlink(lag); + + lag->bonded = false; + lag->role = ICE_LAG_NONE; + lag->upper_netdev = NULL; + lag->bond_aa = false; + lag->need_fltr_cfg = false; + } } /** @@ -1320,6 +1705,11 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf) ice_set_feature_support(pf, ICE_F_SRIOV_LAG); else ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); + + if (caps->sriov_aa_lag && ice_pkg_has_lport_extract(&pf->hw)) + ice_set_feature_support(pf, ICE_F_SRIOV_AA_LAG); + else + ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); } /** @@ -1353,6 +1743,9 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; + lag->bond_lport_pri = lag->pf->hw.port_info->lport; + lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + lag->port_bitmap = 0; } else { u16 swid; @@ -1362,16 +1755,29 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); - ice_lag_cfg_drop_fltr(lag, true); + primary_lag->bond_lport_sec = + lag->pf->hw.port_info->lport; } /* add filter for primary control packets */ - ice_lag_cfg_cp_fltr(lag, true); + ice_lag_cfg_lp_fltr(lag, true, true); } else { if (!primary_lag && lag->primary) primary_lag = lag; + if (primary_lag) { + for (int i = 0; i < ICE_MAX_SRIOV_VFS; i++) { + if (primary_lag->sec_vf[i]) { + ice_vsi_release(primary_lag->sec_vf[i]); + primary_lag->sec_vf[i] = NULL; + } + } + } + if (!lag->primary) { ice_lag_set_swid(0, lag, false); + if (primary_lag) + primary_lag->bond_lport_sec = + ICE_LAG_INVALID_PORT; } else { if (primary_lag && lag->primary) { ice_lag_primary_swid(lag, false); @@ -1379,7 +1785,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) } } /* remove filter for control packets */ - ice_lag_cfg_cp_fltr(lag, false); + ice_lag_cfg_lp_fltr(lag, false, !lag->bond_aa); } } @@ -1405,18 +1811,34 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) if (!netif_is_same_ice(lag->pf, event_netdev)) return; + if (info->upper_dev != lag->upper_netdev) + return; + + if (info->linking) + return; + pf = lag->pf; prim_hw = &pf->hw; prim_port = prim_hw->port_info->lport; - if (info->upper_dev != lag->upper_netdev) - return; - - if (!info->linking) { - /* Since there are only two interfaces allowed in SRIOV+LAG, if - * one port is leaving, then nodes need to be on primary - * interface. - */ + /* Since there are only two interfaces allowed in SRIOV+LAG, if + * one port is leaving, then nodes need to be on primary + * interface. + */ + if (lag->bond_aa) { + struct ice_netdev_priv *e_ndp; + struct ice_pf *e_pf; + + e_ndp = netdev_priv(event_netdev); + e_pf = e_ndp->vsi->back; + + if (lag->bond_lport_pri != ICE_LAG_INVALID_PORT && + lag->port_bitmap & ICE_LAGS_M) { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, e_pf); + lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + } + } else { if (prim_port != lag->active_port && lag->active_port != ICE_LAG_INVALID_PORT) { active_hw = ice_lag_find_hw_by_lport(lag, @@ -1428,44 +1850,32 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) } /** - * ice_lag_monitor_active - main PF keep track of which port is active + * ice_lag_monitor_act_bkup - keep track of which port is active in A/B LAG * @lag: lag info struct - * @ptr: opaque data containing notifier event + * @b_info: bonding info + * @event_netdev: net_device got target netdev * * This function is for the primary PF to monitor changes in which port is * active and handle changes for SRIOV VF functionality */ -static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) +static void ice_lag_monitor_act_bkup(struct ice_lag *lag, + struct netdev_bonding_info *b_info, + struct net_device *event_netdev) { - struct netdev_notifier_bonding_info *info = ptr; - struct net_device *event_netdev, *event_upper; - struct netdev_bonding_info *bonding_info; struct ice_netdev_priv *event_np; struct ice_pf *pf, *event_pf; u8 prim_port, event_port; - if (!lag->primary) - return; - pf = lag->pf; if (!pf) return; - event_netdev = netdev_notifier_info_to_dev(ptr); - rcu_read_lock(); - event_upper = netdev_master_upper_dev_get_rcu(event_netdev); - rcu_read_unlock(); - if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev) - return; - event_np = netdev_priv(event_netdev); event_pf = event_np->vsi->back; event_port = event_pf->hw.port_info->lport; prim_port = pf->hw.port_info->lport; - bonding_info = &info->bonding_info; - - if (!bonding_info->slave.state) { + if (!b_info->slave.state) { /* if no port is currently active, then nodes and filters exist * on primary port, check if we need to move them */ @@ -1501,6 +1911,128 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) } } +/** + * ice_lag_aa_clear_spoof - adjust the placeholder VSI spoofing for A/A LAG + * @vsi: placeholder VSI to adjust + */ +static void ice_lag_aa_clear_spoof(struct ice_vsi *vsi) +{ + ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); +} + +/** + * ice_lag_monitor_act_act - Keep track of active ports in A/A LAG + * @lag: lag struct for primary interface + * @b_info: bonding_info for event + * @event_netdev: net_device for target netdev + */ +static void ice_lag_monitor_act_act(struct ice_lag *lag, + struct netdev_bonding_info *b_info, + struct net_device *event_netdev) +{ + struct ice_netdev_priv *event_np; + u8 prim_port, event_port; + struct ice_pf *event_pf; + + event_np = netdev_priv(event_netdev); + event_pf = event_np->vsi->back; + event_port = event_pf->hw.port_info->lport; + prim_port = lag->pf->hw.port_info->lport; + + if (b_info->slave.link == BOND_LINK_UP) { + /* Port is coming up */ + if (prim_port == event_port) { + /* Processing event for primary interface */ + if (lag->bond_lport_pri == ICE_LAG_INVALID_PORT) + return; + + if (!(lag->port_bitmap & ICE_LAGP_M)) { + /* Primary port was not marked up before, move + * some|all VF queues to it and mark as up + */ + lag->port_bitmap |= ICE_LAGP_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf); + } + } else { + if (lag->bond_lport_sec == ICE_LAG_INVALID_PORT) + return; + + /* Create placeholder VSIs on secondary PF. + * The placeholder is necessary so that we have + * an element that represents the VF on the secondary + * interface's scheduling tree. This will be a tree + * root for scheduling nodes when they are moved to + * the secondary interface. + */ + if (!lag->sec_vf[0]) { + struct ice_vsi_cfg_params params = {}; + struct ice_vsi *nvsi; + struct ice_vf *vf; + unsigned int bkt; + + params.type = ICE_VSI_VF; + params.port_info = event_pf->hw.port_info; + params.flags = ICE_VSI_FLAG_INIT; + + ice_for_each_vf(lag->pf, bkt, vf) { + params.vf = vf; + nvsi = ice_vsi_setup(event_pf, + ¶ms); + ice_lag_aa_clear_spoof(nvsi); + lag->sec_vf[vf->vf_id] = nvsi; + } + } + + if (!(lag->port_bitmap & ICE_LAGS_M)) { + /* Secondary port was not marked up before, + * move some|all VF queues to it and mark as up + */ + lag->port_bitmap |= ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf); + } + } + } else { + /* Port is going down */ + if (prim_port == event_port) { + lag->port_bitmap &= ~ICE_LAGP_M; + ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf); + } else { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf); + } + } +} + +/** + * ice_lag_monitor_info - Calls relevant A/A or A/B monitoring function + * @lag: lag info struct + * @ptr: opaque data containing notifier event + * + * This function is for the primary PF to monitor changes in which port is + * active and handle changes for SRIOV VF functionality + */ +static void ice_lag_monitor_info(struct ice_lag *lag, void *ptr) +{ + struct netdev_notifier_bonding_info *info = ptr; + struct net_device *event_netdev, *event_upper; + struct netdev_bonding_info *bonding_info; + + if (!lag->primary) + return; + + event_netdev = netdev_notifier_info_to_dev(ptr); + bonding_info = &info->bonding_info; + rcu_read_lock(); + event_upper = netdev_master_upper_dev_get_rcu(event_netdev); + rcu_read_unlock(); + if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev) + return; + + if (lag->bond_aa) + ice_lag_monitor_act_act(lag, bonding_info, event_netdev); + else + ice_lag_monitor_act_bkup(lag, bonding_info, event_netdev); +} /** * ice_lag_chk_comp - evaluate bonded interface for feature support * @lag: lag info struct @@ -1516,6 +2048,14 @@ ice_lag_chk_comp(struct ice_lag *lag, void *ptr) struct device *dev; int count = 0; + /* All members need to know if bond A/A or A/B */ + bonding_info = &info->bonding_info; + lag->bond_mode = bonding_info->master.bond_mode; + if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) + lag->bond_aa = true; + else + lag->bond_aa = false; + if (!lag->primary) return true; @@ -1536,12 +2076,9 @@ ice_lag_chk_comp(struct ice_lag *lag, void *ptr) return false; } - bonding_info = &info->bonding_info; - lag->bond_mode = bonding_info->master.bond_mode; - if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) { - dev_info(dev, "Bond Mode not ACTIVE-BACKUP - VF LAG disabled\n"); + if (lag->bond_aa && !ice_is_feature_supported(lag->pf, + ICE_F_SRIOV_AA_LAG)) return false; - } list_for_each(tmp, lag->netdev_head) { struct ice_dcbx_cfg *dcb_cfg, *peer_dcb_cfg; @@ -1699,6 +2236,26 @@ static void ice_lag_disable_sriov_bond(struct ice_lag *lag) struct ice_pf *pf = np->vsi->back; ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); + ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); +} + +/** + * ice_lag_preset_drop_fltr - preset drop filter for A/B bonds + * @lag: local lag struct + * @ptr: opaque data containing event + * + * Sets the initial drop filter for secondary interface in an + * active-backup bond + */ +static void ice_lag_preset_drop_fltr(struct ice_lag *lag, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (netdev != lag->netdev || lag->primary || !lag->need_fltr_cfg) + return; + + ice_lag_cfg_drop_fltr(lag, true); + lag->need_fltr_cfg = false; } /** @@ -1739,10 +2296,12 @@ static void ice_lag_process_event(struct work_struct *work) ice_lag_unregister(lag_work->lag, netdev); goto lag_cleanup; } - ice_lag_monitor_active(lag_work->lag, - &lag_work->info.bonding_info); ice_lag_cfg_pf_fltrs(lag_work->lag, &lag_work->info.bonding_info); + ice_lag_preset_drop_fltr(lag_work->lag, + &lag_work->info.bonding_info); + ice_lag_monitor_info(lag_work->lag, + &lag_work->info.bonding_info); } ice_lag_info_event(lag_work->lag, &lag_work->info.bonding_info); break; @@ -1993,7 +2552,8 @@ ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw, } if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, hw->port_info->lport, - dest_hw->port_info->lport, NULL)) { + dest_hw->port_info->lport, + ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG reset rebuild\n"); goto sync_qerr; } @@ -2089,9 +2649,13 @@ int ice_init_lag(struct ice_pf *pf) lag->netdev = vsi->netdev; lag->role = ICE_LAG_NONE; lag->active_port = ICE_LAG_INVALID_PORT; + lag->port_bitmap = 0x0; lag->bonded = false; + lag->bond_aa = false; + lag->need_fltr_cfg = false; lag->upper_netdev = NULL; lag->notif_block.notifier_call = NULL; + memset(lag->sec_vf, 0, sizeof(lag->sec_vf)); err = ice_register_lag_handler(lag); if (err) { @@ -2109,6 +2673,11 @@ int ice_init_lag(struct ice_pf *pf) if (err) goto free_rcp_res; + err = ice_create_lag_recipe(&pf->hw, &lag->act_act_recipe, + ice_lport_rcp, 1); + if (err) + goto free_lport_res; + /* associate recipes to profiles */ for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) { err = ice_aq_get_recipe_to_profile(&pf->hw, n, @@ -2118,7 +2687,8 @@ int ice_init_lag(struct ice_pf *pf) if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) { recipe_bits |= BIT(lag->pf_recipe) | - BIT(lag->lport_recipe); + BIT(lag->lport_recipe) | + BIT(lag->act_act_recipe); ice_aq_map_recipe_to_profile(&pf->hw, n, recipe_bits, NULL); } @@ -2129,9 +2699,13 @@ int ice_init_lag(struct ice_pf *pf) dev_dbg(dev, "INIT LAG complete\n"); return 0; +free_lport_res: + ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1, + &lag->lport_recipe); + free_rcp_res: ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1, - &pf->lag->pf_recipe); + &lag->pf_recipe); lag_error: kfree(lag); pf->lag = NULL; @@ -2216,11 +2790,15 @@ void ice_lag_rebuild(struct ice_pf *pf) ice_lag_move_vf_nodes_sync(prim_lag, &pf->hw); } - ice_lag_cfg_cp_fltr(lag, true); + if (!lag->bond_aa) { + ice_lag_cfg_lp_fltr(lag, true, true); + if (lag->pf_rx_rule_id) + if (ice_lag_cfg_dflt_fltr(lag, true)) + dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); + } else { + ice_lag_cfg_lp_fltr(lag, true, false); + } - if (lag->pf_rx_rule_id) - if (ice_lag_cfg_dflt_fltr(lag, true)) - dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); ice_clear_rdma_cap(pf); lag_rebuild_out: diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 69347d9f986b..e2a0a782bdd7 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -14,7 +14,11 @@ enum ice_lag_role { ICE_LAG_UNSET }; -#define ICE_LAG_INVALID_PORT 0xFF +#define ICE_LAG_INVALID_PORT 0xFF +#define ICE_LAGP_IDX 0 +#define ICE_LAGS_IDX 1 +#define ICE_LAGP_M 0x1 +#define ICE_LAGS_M 0x2 #define ICE_LAG_RESET_RETRIES 5 #define ICE_SW_DEFAULT_PROFILE 0 @@ -41,12 +45,26 @@ struct ice_lag { u8 active_port; /* lport value for the current active port */ u8 bonded:1; /* currently bonded */ u8 primary:1; /* this is primary */ + u8 bond_aa:1; /* is this bond active-active */ + u8 need_fltr_cfg:1; /* fltrs for A/A bond still need to be make */ + u8 port_bitmap:2; /* bitmap of active ports */ + u8 bond_lport_pri; /* lport values for primary PF */ + u8 bond_lport_sec; /* lport values for secondary PF */ + + /* q_home keeps track of which interface the q is currently on */ + u8 q_home[ICE_MAX_SRIOV_VFS][ICE_MAX_RSS_QS_PER_VF]; + + /* placeholder VSI for hanging VF queues from on secondary interface */ + struct ice_vsi *sec_vf[ICE_MAX_SRIOV_VFS]; + u16 pf_recipe; u16 lport_recipe; + u16 act_act_recipe; u16 pf_rx_rule_id; u16 pf_tx_rule_id; u16 cp_rule_idx; u16 lport_rule_idx; + u16 act_act_rule_idx; u8 role; }; @@ -65,6 +83,7 @@ struct ice_lag_work { }; void ice_lag_move_new_vf_nodes(struct ice_vf *vf); +void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf); int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); void ice_lag_rebuild(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 2b07d5e48847..8d19efc1df72 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -296,6 +296,7 @@ struct ice_hw_common_caps { bool roce_lag; bool sriov_lag; + bool sriov_aa_lag; bool nvm_update_pending_nvm; bool nvm_update_pending_orom; diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index dbe93f940ef0..ba62f703df43 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -194,6 +194,7 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 #define LIBIE_AQC_BIT_ROCEV2_LAG BIT(0) #define LIBIE_AQC_BIT_SRIOV_LAG BIT(1) +#define LIBIE_AQC_BIT_SRIOV_AA_LAG BIT(2) #define LIBIE_AQC_CAPS_FLEX10 0x00F1 #define LIBIE_AQC_CAPS_CEM 0x00F2 -- cgit v1.2.3 From 448f97fba9013ffa13f5dd82febd18836b189499 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Aug 2025 12:39:13 +0200 Subject: perf: Convert mmap() refcounts to refcount_t The recently fixed reference count leaks could have been detected by using refcount_t and refcount_t would have mitigated the potential overflow at least. Now that the code is properly structured, convert the mmap() related mmap_count variants over to refcount_t. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lorenzo Stoakes Link: https://lore.kernel.org/r/20250812104020.071507932@infradead.org --- include/linux/perf_event.h | 2 +- kernel/events/core.c | 40 ++++++++++++++++++++-------------------- kernel/events/internal.h | 4 ++-- kernel/events/ring_buffer.c | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ec9d96025683..bfbf9ea53f25 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -859,7 +859,7 @@ struct perf_event { /* mmap bits */ struct mutex mmap_mutex; - atomic_t mmap_count; + refcount_t mmap_count; struct perf_buffer *rb; struct list_head rb_entry; diff --git a/kernel/events/core.c b/kernel/events/core.c index f6211ab18503..ea357044d780 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3968,7 +3968,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, */ static inline bool event_update_userpage(struct perf_event *event) { - if (likely(!atomic_read(&event->mmap_count))) + if (likely(!refcount_read(&event->mmap_count))) return false; perf_event_update_time(event); @@ -6704,11 +6704,11 @@ static void perf_mmap_open(struct vm_area_struct *vma) struct perf_event *event = vma->vm_file->private_data; mapped_f mapped = get_mapped(event, event_mapped); - atomic_inc(&event->mmap_count); - atomic_inc(&event->rb->mmap_count); + refcount_inc(&event->mmap_count); + refcount_inc(&event->rb->mmap_count); if (vma->vm_pgoff) - atomic_inc(&event->rb->aux_mmap_count); + refcount_inc(&event->rb->aux_mmap_count); if (mapped) mapped(event, vma->vm_mm); @@ -6743,7 +6743,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { + refcount_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6763,10 +6763,10 @@ static void perf_mmap_close(struct vm_area_struct *vma) mutex_unlock(&rb->aux_mutex); } - if (atomic_dec_and_test(&rb->mmap_count)) + if (refcount_dec_and_test(&rb->mmap_count)) detach_rest = true; - if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) + if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) goto out_put; ring_buffer_attach(event, NULL); @@ -6992,19 +6992,19 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, if (data_page_nr(event->rb) != nr_pages) return -EINVAL; - if (atomic_inc_not_zero(&event->rb->mmap_count)) { + if (refcount_inc_not_zero(&event->rb->mmap_count)) { /* * Success -- managed to mmap() the same buffer * multiple times. */ perf_mmap_account(vma, user_extra, extra); - atomic_inc(&event->mmap_count); + refcount_inc(&event->mmap_count); return 0; } /* * Raced against perf_mmap_close()'s - * atomic_dec_and_mutex_lock() remove the + * refcount_dec_and_mutex_lock() remove the * event and continue as if !event->rb */ ring_buffer_attach(event, NULL); @@ -7023,7 +7023,7 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, if (!rb) return -ENOMEM; - atomic_set(&rb->mmap_count, 1); + refcount_set(&rb->mmap_count, 1); rb->mmap_user = get_current_user(); rb->mmap_locked = extra; @@ -7034,7 +7034,7 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, perf_event_update_userpage(event); perf_mmap_account(vma, user_extra, extra); - atomic_set(&event->mmap_count, 1); + refcount_set(&event->mmap_count, 1); return 0; } @@ -7081,15 +7081,15 @@ static int perf_mmap_aux(struct vm_area_struct *vma, struct perf_event *event, if (!is_power_of_2(nr_pages)) return -EINVAL; - if (!atomic_inc_not_zero(&rb->mmap_count)) + if (!refcount_inc_not_zero(&rb->mmap_count)) return -EINVAL; if (rb_has_aux(rb)) { - atomic_inc(&rb->aux_mmap_count); + refcount_inc(&rb->aux_mmap_count); } else { if (!perf_mmap_calc_limits(vma, &user_extra, &extra)) { - atomic_dec(&rb->mmap_count); + refcount_dec(&rb->mmap_count); return -EPERM; } @@ -7101,16 +7101,16 @@ static int perf_mmap_aux(struct vm_area_struct *vma, struct perf_event *event, ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, rb_flags); if (ret) { - atomic_dec(&rb->mmap_count); + refcount_dec(&rb->mmap_count); return ret; } - atomic_set(&rb->aux_mmap_count, 1); + refcount_set(&rb->aux_mmap_count, 1); rb->aux_mmap_locked = extra; } perf_mmap_account(vma, user_extra, extra); - atomic_inc(&event->mmap_count); + refcount_inc(&event->mmap_count); return 0; } @@ -13254,7 +13254,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); set: /* Can't redirect output if we've got an active mmap() */ - if (atomic_read(&event->mmap_count)) + if (refcount_read(&event->mmap_count)) goto unlock; if (output_event) { @@ -13267,7 +13267,7 @@ set: goto unlock; /* did we race against perf_mmap_close() */ - if (!atomic_read(&rb->mmap_count)) { + if (!refcount_read(&rb->mmap_count)) { ring_buffer_put(rb); goto unlock; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 249288d82b8d..d9cc57083091 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -35,7 +35,7 @@ struct perf_buffer { spinlock_t event_lock; struct list_head event_list; - atomic_t mmap_count; + refcount_t mmap_count; unsigned long mmap_locked; struct user_struct *mmap_user; @@ -47,7 +47,7 @@ struct perf_buffer { unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; - atomic_t aux_mmap_count; + refcount_t aux_mmap_count; unsigned long aux_mmap_locked; void (*free_aux)(void *); refcount_t aux_refcount; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index aa9a759e824f..20a905023736 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -400,7 +400,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * the same order, see perf_mmap_close. Otherwise we end up freeing * aux pages in this path, which is a bug, because in_atomic(). */ - if (!atomic_read(&rb->aux_mmap_count)) + if (!refcount_read(&rb->aux_mmap_count)) goto err; if (!refcount_inc_not_zero(&rb->aux_refcount)) -- cgit v1.2.3 From 2335b3f56690f76ac34b972fcaef368bab1f76f2 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 16 Jun 2025 23:41:53 -0700 Subject: net/mlx5: mlx5_ifc, Add hardware definitions needed for adjacent vports Next patches will implement the discovery and creation of adjacent functions vports, this patch introduces the hardware structures definitions needed for the driver implementation. Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Reviewed-by: Parav Pandit Reviewed-by: Jack Morgenstein Signed-off-by: Alexei Lazar --- include/linux/mlx5/mlx5_ifc.h | 133 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8360d9011d4f..44d497272162 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -189,6 +189,9 @@ enum { MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, MLX5_CMD_OP_RELEASE_XRQ_ERROR = 0x729, MLX5_CMD_OP_MODIFY_XRQ = 0x72a, + MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA = 0x732, + MLX5_CMD_OPCODE_CREATE_ESW_VPORT = 0x733, + MLX5_CMD_OPCODE_DESTROY_ESW_VPORT = 0x734, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, @@ -2207,7 +2210,19 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_440[0x8]; u8 max_num_eqs_24b[0x18]; - u8 reserved_at_460[0x3a0]; + + u8 reserved_at_460[0x160]; + + u8 query_adjacent_functions_id[0x1]; + u8 ingress_egress_esw_vport_connect[0x1]; + u8 function_id_type_vhca_id[0x1]; + u8 reserved_at_5c3[0xd]; + u8 delegate_vhca_management_profiles[0x10]; + + u8 delegated_vhca_max[0x10]; + u8 delegate_vhca_max[0x10]; + + u8 reserved_at_600[0x200]; }; enum mlx5_ifc_flow_destination_type { @@ -5159,7 +5174,9 @@ struct mlx5_ifc_set_hca_cap_in_bits { u8 other_function[0x1]; u8 ec_vf_function[0x1]; - u8 reserved_at_42[0xe]; + u8 reserved_at_42[0x1]; + u8 function_id_type[0x1]; + u8 reserved_at_44[0xc]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6357,7 +6374,9 @@ struct mlx5_ifc_query_hca_cap_in_bits { u8 other_function[0x1]; u8 ec_vf_function[0x1]; - u8 reserved_at_42[0xe]; + u8 reserved_at_42[0x1]; + u8 function_id_type[0x1]; + u8 reserved_at_44[0xc]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6983,6 +7002,28 @@ struct mlx5_ifc_query_esw_vport_context_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_esw_vport_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; +}; + +struct mlx5_ifc_destroy_esw_vport_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vport_num[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_modify_esw_vport_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7484,6 +7525,85 @@ struct mlx5_ifc_query_adapter_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_function_vhca_rid_info_reg_bits { + u8 host_number[0x8]; + u8 host_pci_device_function[0x8]; + u8 host_pci_bus[0x8]; + u8 reserved_at_18[0x3]; + u8 pci_bus_assigned[0x1]; + u8 function_type[0x4]; + + u8 parent_pci_device_function[0x8]; + u8 parent_pci_bus[0x8]; + u8 vhca_id[0x10]; + + u8 reserved_at_40[0x10]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_delegated_function_vhca_rid_info_bits { + struct mlx5_ifc_function_vhca_rid_info_reg_bits function_vhca_rid_info; + + u8 reserved_at_80[0x18]; + u8 manage_profile[0x8]; + + u8 reserved_at_a0[0x60]; +}; + +struct mlx5_ifc_query_delegated_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 functions_count[0x10]; + + u8 reserved_at_80[0x80]; + + struct mlx5_ifc_delegated_function_vhca_rid_info_bits + delegated_function_vhca_rid_info[]; +}; + +struct mlx5_ifc_query_delegated_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_esw_vport_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 vport_num[0x10]; +}; + +struct mlx5_ifc_create_esw_vport_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 managed_vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_qp_2rst_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7611,7 +7731,12 @@ struct mlx5_ifc_modify_vport_state_in_bits { u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_at_60[0x18]; + u8 reserved_at_60[0x10]; + u8 ingress_connect[0x1]; + u8 egress_connect[0x1]; + u8 ingress_connect_valid[0x1]; + u8 egress_connect_valid[0x1]; + u8 reserved_at_74[0x4]; u8 admin_state[0x4]; u8 reserved_at_7c[0x4]; }; -- cgit v1.2.3 From 40653f280b2640e5caa94eeedee43e0f1df97704 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 16 Jun 2025 17:28:20 -0700 Subject: {rdma,net}/mlx5: export mlx5_vport_get_vhca_id vhca id is already cached in the vport structure no need to query on every mlx5 layer, use the mlx5_vport_get_vhca_id, where possible. Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Reviewed-by: Parav Pandit Signed-off-by: Alexei Lazar Reviewed-by: Feng Liu Reviewed-by: Tariq Toukan --- drivers/infiniband/hw/mlx5/std_types.c | 27 ++++------------------ .../mellanox/mlx5/core/diag/reporter_vnic.c | 2 ++ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 -- .../ethernet/mellanox/mlx5/core/steering/hws/cmd.c | 16 +++++++++---- .../mellanox/mlx5/core/steering/sws/dr_cmd.c | 16 +++++++++---- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 5 +++- include/linux/mlx5/vport.h | 2 ++ 7 files changed, 35 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c index bdb568411091..2fcf553044e1 100644 --- a/drivers/infiniband/hw/mlx5/std_types.c +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -83,33 +83,14 @@ static int fill_vport_icm_addr(struct mlx5_core_dev *mdev, u16 vport, static int fill_vport_vhca_id(struct mlx5_core_dev *mdev, u16 vport, struct mlx5_ib_uapi_query_port *info) { - size_t out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; - void *out; - int err; - - out = kzalloc(out_sz, GFP_KERNEL); - if (!out) - return -ENOMEM; + int err = mlx5_vport_get_vhca_id(mdev, vport, &info->vport_vhca_id); - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, true); - MLX5_SET(query_hca_cap_in, in, function_id, vport); - MLX5_SET(query_hca_cap_in, in, op_mod, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE | - HCA_CAP_OPMOD_GET_CUR); - - err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_sz); if (err) - goto out; - - info->vport_vhca_id = MLX5_GET(query_hca_cap_out, out, - capability.cmd_hca_cap.vhca_id); + return err; info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID; -out: - kfree(out); - return err; + + return 0; } static int fill_multiport_info(struct mlx5_ib_dev *dev, u32 port_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index 86253a89c24c..32bb769f1829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ +#include + #include "reporter_vnic.h" #include "en_stats.h" #include "devlink.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b6d53db27cd5..81857c6f6bf7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -447,8 +447,6 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) -int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); - static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 9c83753e4592..d447574d86fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -1199,22 +1199,28 @@ out: int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function, u16 vport_number, u16 *gvmi) { - bool ec_vf_func = other_function ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; int out_size; void *out; int err; + if (other_function) { + err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi); + if (!err) + return 0; + + mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n", + vport_number); + return err; + } + + /* get vhca_id for `this` function */ out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); out = kzalloc(out_size, GFP_KERNEL); if (!out) return -ENOMEM; MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, other_function); - MLX5_SET(query_hca_cap_in, in, function_id, - mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); - MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); MLX5_SET(query_hca_cap_in, in, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c index baefb9a3fa05..bf99b933fd14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include "dr_types.h" +#include "eswitch.h" int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, bool other_vport, @@ -34,21 +35,28 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, u16 vport_number, u16 *gvmi) { - bool ec_vf_func = other_vport ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; int out_size; void *out; int err; + if (other_vport) { + err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi); + if (!err) + return 0; + + mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n", + vport_number); + return err; + } + + /* get vhca_id for `this` function */ out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); out = kzalloc(out_size, GFP_KERNEL); if (!out) return -ENOMEM; MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, other_vport); - MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); - MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); MLX5_SET(query_hca_cap_in, in, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 231bedc6a252..2ed2e530b07d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1239,7 +1239,9 @@ int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id) void *hca_caps; int err; - *vhca_id = 0; + /* try get vhca_id via eswitch */ + if (mlx5_esw_vport_vhca_id(dev->priv.eswitch, vport, vhca_id)) + return 0; query_ctx = kzalloc(query_out_sz, GFP_KERNEL); if (!query_ctx) @@ -1256,6 +1258,7 @@ out_free: kfree(query_ctx); return err; } +EXPORT_SYMBOL_GPL(mlx5_vport_get_vhca_id); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod) diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index c36cc6d82926..c87b9507cfa1 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -135,4 +135,6 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod); +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); + #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 33cfb80d1910b41d1a25cef89b159c945aff0f24 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:10 +0000 Subject: crypto: ccp - Add support for SNP_FEATURE_INFO command The FEATURE_INFO command provides hypervisors with a programmatic means to learn about the supported features of the currently loaded firmware. This command mimics the CPUID instruction relative to sub-leaf input and the four unsigned integer output values. To obtain information regarding the features present in the currently loaded SEV firmware, use the SNP_FEATURE_INFO command. Cache the SNP platform status and feature information from CPUID 0x8000_0024 in the sev_device structure. If SNP is enabled, utilize this cached SNP platform status for the API major, minor and build version. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ drivers/crypto/ccp/sev-dev.h | 3 ++ include/linux/psp-sev.h | 29 ++++++++++++++++++ 3 files changed, 104 insertions(+) (limited to 'include/linux') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 528013be1c0a..a3941254d61f 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -233,6 +233,7 @@ static int sev_cmd_buffer_len(int cmd) case SEV_CMD_SNP_GUEST_REQUEST: return sizeof(struct sev_data_snp_guest_request); case SEV_CMD_SNP_CONFIG: return sizeof(struct sev_user_data_snp_config); case SEV_CMD_SNP_COMMIT: return sizeof(struct sev_data_snp_commit); + case SEV_CMD_SNP_FEATURE_INFO: return sizeof(struct sev_data_snp_feature_info); default: return 0; } @@ -1073,6 +1074,67 @@ static void snp_set_hsave_pa(void *arg) wrmsrq(MSR_VM_HSAVE_PA, 0); } +static int snp_get_platform_data(struct sev_device *sev, int *error) +{ + struct sev_data_snp_feature_info snp_feat_info; + struct snp_feature_info *feat_info; + struct sev_data_snp_addr buf; + struct page *page; + int rc; + + /* + * This function is expected to be called before SNP is + * initialized. + */ + if (sev->snp_initialized) + return -EINVAL; + + buf.address = __psp_pa(&sev->snp_plat_status); + rc = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, error); + if (rc) { + dev_err(sev->dev, "SNP PLATFORM_STATUS command failed, ret = %d, error = %#x\n", + rc, *error); + return rc; + } + + sev->api_major = sev->snp_plat_status.api_major; + sev->api_minor = sev->snp_plat_status.api_minor; + sev->build = sev->snp_plat_status.build_id; + + /* + * Do feature discovery of the currently loaded firmware, + * and cache feature information from CPUID 0x8000_0024, + * sub-function 0. + */ + if (!sev->snp_plat_status.feature_info) + return 0; + + /* + * Use dynamically allocated structure for the SNP_FEATURE_INFO + * command to ensure structure is 8-byte aligned, and does not + * cross a page boundary. + */ + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + feat_info = page_address(page); + snp_feat_info.length = sizeof(snp_feat_info); + snp_feat_info.ecx_in = 0; + snp_feat_info.feature_info_paddr = __psp_pa(feat_info); + + rc = sev_do_cmd(SEV_CMD_SNP_FEATURE_INFO, &snp_feat_info, error); + if (!rc) + sev->snp_feat_info_0 = *feat_info; + else + dev_err(sev->dev, "SNP FEATURE_INFO command failed, ret = %d, error = %#x\n", + rc, *error); + + __free_page(page); + + return rc; +} + static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg) { struct sev_data_range_list *range_list = arg; @@ -1599,6 +1661,16 @@ static int sev_get_api_version(void) struct sev_user_data_status status; int error = 0, ret; + /* + * Cache SNP platform status and SNP feature information + * if SNP is available. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) { + ret = snp_get_platform_data(sev, &error); + if (ret) + return 1; + } + ret = sev_platform_status(&status, &error); if (ret) { dev_err(sev->dev, diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h index 24dd8ff8afaa..5aed2595c9ae 100644 --- a/drivers/crypto/ccp/sev-dev.h +++ b/drivers/crypto/ccp/sev-dev.h @@ -58,6 +58,9 @@ struct sev_device { bool snp_initialized; struct sev_user_data_status sev_plat_status; + + struct sev_user_data_snp_status snp_plat_status; + struct snp_feature_info snp_feat_info_0; }; int sev_dev_init(struct psp_device *psp); diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0f5f94137f6d..5fb6ae0f51cc 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -107,6 +107,7 @@ enum sev_cmd { SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, SEV_CMD_SNP_COMMIT = 0x0CB, SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_SNP_FEATURE_INFO = 0x0CE, SEV_CMD_MAX, }; @@ -814,6 +815,34 @@ struct sev_data_snp_commit { u32 len; } __packed; +/** + * struct sev_data_snp_feature_info - SEV_SNP_FEATURE_INFO structure + * + * @length: len of the command buffer read by the PSP + * @ecx_in: subfunction index + * @feature_info_paddr : System Physical Address of the FEATURE_INFO structure + */ +struct sev_data_snp_feature_info { + u32 length; + u32 ecx_in; + u64 feature_info_paddr; +} __packed; + +/** + * struct feature_info - FEATURE_INFO structure + * + * @eax: output of SNP_FEATURE_INFO command + * @ebx: output of SNP_FEATURE_INFO command + * @ecx: output of SNP_FEATURE_INFO command + * #edx: output of SNP_FEATURE_INFO command + */ +struct snp_feature_info { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** -- cgit v1.2.3 From 45d59bd4a3e0f0475b3646e8b9936d34794e503d Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:27 +0000 Subject: crypto: ccp - Introduce new API interface to indicate SEV-SNP Ciphertext hiding feature Implement an API that checks the overall feature support for SEV-SNP ciphertext hiding. This API verifies both the support of the SEV firmware for the feature and its enablement in the platform's BIOS. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 21 +++++++++++++++++++++ include/linux/psp-sev.h | 5 +++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index a3941254d61f..58c9e040e9ac 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1074,6 +1074,27 @@ static void snp_set_hsave_pa(void *arg) wrmsrq(MSR_VM_HSAVE_PA, 0); } +bool sev_is_snp_ciphertext_hiding_supported(void) +{ + struct psp_device *psp = psp_master; + struct sev_device *sev; + + if (!psp || !psp->sev_data) + return false; + + sev = psp->sev_data; + + /* + * Feature information indicates if CipherTextHiding feature is + * supported by the SEV firmware and additionally platform status + * indicates if CipherTextHiding feature is enabled in the + * Platform BIOS. + */ + return ((sev->snp_feat_info_0.ecx & SNP_CIPHER_TEXT_HIDING_SUPPORTED) && + sev->snp_plat_status.ciphertext_hiding_cap); +} +EXPORT_SYMBOL_GPL(sev_is_snp_ciphertext_hiding_supported); + static int snp_get_platform_data(struct sev_device *sev, int *error) { struct sev_data_snp_feature_info snp_feat_info; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 5fb6ae0f51cc..d83185b4268b 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -843,6 +843,8 @@ struct snp_feature_info { u32 edx; } __packed; +#define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** @@ -986,6 +988,7 @@ void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); void sev_platform_shutdown(void); +bool sev_is_snp_ciphertext_hiding_supported(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -1022,6 +1025,8 @@ static inline void snp_free_firmware_page(void *addr) { } static inline void sev_platform_shutdown(void) { } +static inline bool sev_is_snp_ciphertext_hiding_supported(void) { return false; } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ -- cgit v1.2.3 From c9760b0fca6bfa250c02e14bfe81c542f3626a72 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:55 +0000 Subject: crypto: ccp - Add support to enable CipherTextHiding on SNP_INIT_EX To enable ciphertext hiding, it must be specified in the SNP_INIT_EX command as part of SNP initialization. Modify the sev_platform_init_args structure, which is used as input to sev_platform_init(), to include a field that, when non-zero, indicates that ciphertext hiding should be enabled and specifies the maximum ASID that can be used for an SEV-SNP guest. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 12 +++++++++--- include/linux/psp-sev.h | 10 ++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 58c9e040e9ac..334405461657 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1186,7 +1186,7 @@ static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg) return 0; } -static int __sev_snp_init_locked(int *error) +static int __sev_snp_init_locked(int *error, unsigned int max_snp_asid) { struct psp_device *psp = psp_master; struct sev_data_snp_init_ex data; @@ -1247,6 +1247,12 @@ static int __sev_snp_init_locked(int *error) } memset(&data, 0, sizeof(data)); + + if (max_snp_asid) { + data.ciphertext_hiding_en = 1; + data.max_snp_asid = max_snp_asid; + } + data.init_rmp = 1; data.list_paddr_en = 1; data.list_paddr = __psp_pa(snp_range_list); @@ -1433,7 +1439,7 @@ static int _sev_platform_init_locked(struct sev_platform_init_args *args) if (sev->sev_plat_status.state == SEV_STATE_INIT) return 0; - rc = __sev_snp_init_locked(&args->error); + rc = __sev_snp_init_locked(&args->error, args->max_snp_asid); if (rc && rc != -ENODEV) return rc; @@ -1516,7 +1522,7 @@ static int snp_move_to_init_state(struct sev_issue_cmd *argp, bool *shutdown_req { int error, rc; - rc = __sev_snp_init_locked(&error); + rc = __sev_snp_init_locked(&error, 0); if (rc) { argp->error = SEV_RET_INVALID_PLATFORM_STATE; return rc; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index d83185b4268b..e0dbcb4b4fd9 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -748,10 +748,13 @@ struct sev_data_snp_guest_request { struct sev_data_snp_init_ex { u32 init_rmp:1; u32 list_paddr_en:1; - u32 rsvd:30; + u32 rapl_dis:1; + u32 ciphertext_hiding_en:1; + u32 rsvd:28; u32 rsvd1; u64 list_paddr; - u8 rsvd2[48]; + u16 max_snp_asid; + u8 rsvd2[46]; } __packed; /** @@ -800,10 +803,13 @@ struct sev_data_snp_shutdown_ex { * @probe: True if this is being called as part of CCP module probe, which * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed * unless psp_init_on_probe module param is set + * @max_snp_asid: When non-zero, enable ciphertext hiding and specify the + * maximum ASID that can be used for an SEV-SNP guest. */ struct sev_platform_init_args { int error; bool probe; + unsigned int max_snp_asid; }; /** -- cgit v1.2.3 From b76c739c3d11d1dacc8efe7fa873bee28ac991f1 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 22 Jul 2025 16:52:38 -0500 Subject: iio: fix iio_push_to_buffers_with_ts() typo Replace iio_push_to_buffer_with_ts() with iio_push_to_buffers_with_ts() in some documentation comments in iio.h. The latter is the correct name of the function, the former doesn't exist. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250722-iio-fix-iio_push_to_buffer_with_ts-typo-v1-1-6ac9efb856d3@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d11668f14a3e..2f5560646ee4 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -779,7 +779,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * them safe for use with non-coherent DMA. * * A number of drivers also use this on buffers that include a 64-bit timestamp - * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where + * that is used with iio_push_to_buffers_with_ts(). Therefore, in the case where * DMA alignment is not sufficient for proper timestamp alignment, we align to * 8 bytes instead. */ @@ -794,7 +794,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * @name: identifier name of the buffer * @count: number of elements in the buffer * - * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In + * Declares a buffer that is safe to use with iio_push_to_buffers_with_ts(). In * addition to allocating enough space for @count elements of @type, it also * allocates space for a s64 timestamp at the end of the buffer and ensures * proper alignment of the timestamp. -- cgit v1.2.3 From 4847d1187402a5027d9a04393f12d52a5a1d7f98 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:41 +0200 Subject: console: introduce console_lock guard()s Having this, guards like these work: guard(console_lock)(); or scoped_guard(console_lock) { ... } See e.g. "vc_screen: use guard()s" later in this series. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 8f10d0a85bb4..031a58dc2b91 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -666,6 +666,8 @@ void vcs_remove_sysfs(int index); */ extern atomic_t ignore_console_lock_warning; +DEFINE_LOCK_GUARD_0(console_lock, console_lock(), console_unlock()); + extern void console_init(void); /* For deferred console takeover */ -- cgit v1.2.3 From e8398b8aed50382c21fcec77e80a5314e7c45c25 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:42 +0200 Subject: tty: introduce tty_port_tty guard() Having this, guards like these work: scoped_guard(tty_port_tty, port) tty_wakeup(scoped_tty()); See e.g. "tty_port: use scoped_guard()" later in this series. The definitions depend on CONFIG_TTY. It's due to tty_kref_put(). On !CONFIG_TTY, it is an inline and its declaration would conflict. The guards are not needed in that case, of course. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 332ddb93603e..660c254f1efe 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -270,4 +270,18 @@ static inline void tty_port_tty_vhangup(struct tty_port *port) __tty_port_tty_hangup(port, false, false); } +#ifdef CONFIG_TTY +void tty_kref_put(struct tty_struct *tty); +__DEFINE_CLASS_IS_CONDITIONAL(tty_port_tty, true); +__DEFINE_UNLOCK_GUARD(tty_port_tty, struct tty_struct, tty_kref_put(_T->lock)); +static inline class_tty_port_tty_t class_tty_port_tty_constructor(struct tty_port *tport) +{ + class_tty_port_tty_t _t = { + .lock = tty_port_tty_get(tport), + }; + return _t; +} +#define scoped_tty() ((struct tty_struct *)(__guard_ptr(tty_port_tty)(&scope))) +#endif + #endif -- cgit v1.2.3 From 0fd60b689b0dacce659253ec15cb3d3bf660e30b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:43 +0200 Subject: serial: introduce uart_port_lock() guard()s Having this, guards like these work: guard(uart_port_lock_irq)(&up->port); or scoped_guard(uart_port_lock_irqsave, port) { ... } See e.g. "serial: 8250: use guard()s" later in this series. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 84b4648ead7e..666430b47899 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -788,6 +788,19 @@ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned lo spin_unlock_irqrestore(&up->lock, flags); } +DEFINE_GUARD(uart_port_lock, struct uart_port *, uart_port_lock(_T), uart_port_unlock(_T)); +DEFINE_GUARD_COND(uart_port_lock, _try, uart_port_trylock(_T)); + +DEFINE_GUARD(uart_port_lock_irq, struct uart_port *, uart_port_lock_irq(_T), + uart_port_unlock_irq(_T)); + +DEFINE_LOCK_GUARD_1(uart_port_lock_irqsave, struct uart_port, + uart_port_lock_irqsave(_T->lock, &_T->flags), + uart_port_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags); +DEFINE_LOCK_GUARD_1_COND(uart_port_lock_irqsave, _try, + uart_port_trylock_irqsave(_T->lock, &_T->flags)); + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); -- cgit v1.2.3 From 292cb391479d50f4379a0abab34324de92c82a92 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 13 Aug 2025 23:03:52 -0700 Subject: software node: Constify node_group in registration functions The software_node_register_node_group() and software_node_unregister_node_group() functions take in essence an array of pointers to software_node structs. Since the functions do not modify the array declare the argument as constant, so that static arrays can be declared as const and annotated as __initconst. Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/2zny5grbgtwbplynxffxg6dkgjgqf45aigwmgxio5stesdr3wi@gf2zamk5amic Signed-off-by: Greg Kroah-Hartman --- drivers/base/swnode.c | 5 ++--- include/linux/property.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index deda7f35a059..be1e9e61a7bf 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -844,7 +844,7 @@ swnode_register(const struct software_node *node, struct swnode *parent, * of this function or by ordering the array such that parent comes before * child. */ -int software_node_register_node_group(const struct software_node **node_group) +int software_node_register_node_group(const struct software_node * const *node_group) { unsigned int i; int ret; @@ -877,8 +877,7 @@ EXPORT_SYMBOL_GPL(software_node_register_node_group); * remove the nodes individually, in the correct order (child before * parent). */ -void software_node_unregister_node_group( - const struct software_node **node_group) +void software_node_unregister_node_group(const struct software_node * const *node_group) { unsigned int i = 0; diff --git a/include/linux/property.h b/include/linux/property.h index 82f0cb3abd1e..d1e80b3c9918 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -574,8 +574,8 @@ const struct software_node * software_node_find_by_name(const struct software_node *parent, const char *name); -int software_node_register_node_group(const struct software_node **node_group); -void software_node_unregister_node_group(const struct software_node **node_group); +int software_node_register_node_group(const struct software_node * const *node_group); +void software_node_unregister_node_group(const struct software_node * const *node_group); int software_node_register(const struct software_node *node); void software_node_unregister(const struct software_node *node); -- cgit v1.2.3 From 894af4a1cde61c3401f237184fb770f72ff12df8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 12 Apr 2025 13:56:01 +0200 Subject: objtool: Validate kCFI calls Validate that all indirect calls adhere to kCFI rules. Notably doing nocfi indirect call to a cfi function is broken. Apparently some Rust 'core' code violates this and explodes when ran with FineIBT. All the ANNOTATE_NOCFI_SYM sites are prime targets for attackers. - runtime EFI is especially henous because it also needs to disable IBT. Basically calling unknown code without CFI protection at runtime is a massice security issue. - Kexec image handover; if you can exploit this, you get to keep it :-) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Acked-by: Sean Christopherson Link: https://lkml.kernel.org/r/20250714103441.496787279@infradead.org --- arch/x86/kernel/machine_kexec_64.c | 4 ++++ arch/x86/kvm/vmx/vmenter.S | 4 ++++ arch/x86/platform/efi/efi_stub_64.S | 4 ++++ drivers/misc/lkdtm/perms.c | 5 +++++ include/linux/objtool.h | 10 +++++++++ include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 42 +++++++++++++++++++++++++++++++++++++ tools/objtool/include/objtool/elf.h | 1 + 9 files changed, 72 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 697fb99406e6..8593760c255a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -453,6 +453,10 @@ void __nocfi machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } +/* + * Handover to the next kernel, no CFI concern. + */ +ANNOTATE_NOCFI_SYM(machine_kexec); /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0a6cf5bff2aa..bc255d709d8a 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -361,6 +361,10 @@ SYM_FUNC_END(vmread_error_trampoline) .section .text, "ax" +#ifndef CONFIG_X86_FRED + SYM_FUNC_START(vmx_do_interrupt_irqoff) VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 SYM_FUNC_END(vmx_do_interrupt_irqoff) + +#endif diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 2206b8bc47b8..f0a5fba0717e 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -11,6 +11,10 @@ #include SYM_FUNC_START(__efi_call) + /* + * The EFI code doesn't have any CFI, annotate away the CFI violation. + */ + ANNOTATE_NOCFI_SYM pushq %rbp movq %rsp, %rbp and $~0xf, %rsp diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 6c24426104ba..e1f5e9abb301 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,10 @@ static noinline __nocfi void execute_location(void *dst, bool write) func(); pr_err("FAIL: func returned\n"); } +/* + * Explicitly doing the wrong thing for testing. + */ +ANNOTATE_NOCFI_SYM(execute_location); static void execute_user_location(void *dst) { diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 366ad004d794..46ebaa46e6c5 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -184,6 +184,15 @@ * WARN using UD2. */ #define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) +/* + * This should not be used; it annotates away CFI violations. There are a few + * valid use cases like kexec handover to the next kernel image, and there is + * no security concern there. + * + * There are also a few real issues annotated away, like EFI because we can't + * control the EFI code. + */ +#define ANNOTATE_NOCFI_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI)) #else #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR @@ -194,6 +203,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL #define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN #define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#define ANNOTATE_NOCFI_SYM ANNOTATE type=ANNOTYPE_NOCFI #endif #if defined(CONFIG_NOINSTR_VALIDATION) && \ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d14f20ef1db1..79eab61cd944 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2392,6 +2392,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { + struct symbol *sym; + switch (type) { case ANNOTYPE_NOENDBR: /* early */ @@ -2433,6 +2435,15 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->dead_end = false; break; + case ANNOTYPE_NOCFI: + sym = insn->sym; + if (!sym) { + ERROR_INSN(insn, "dodgy NOCFI annotation"); + return -1; + } + insn->sym->nocfi = 1; + break; + default: ERROR_INSN(insn, "Unknown annotation type: %d", type); return -1; @@ -4002,6 +4013,37 @@ static int validate_retpoline(struct objtool_file *file) warnings++; } + if (!opts.cfi) + return warnings; + + /* + * kCFI call sites look like: + * + * movl $(-0x12345678), %r10d + * addl -4(%r11), %r10d + * jz 1f + * ud2 + * 1: cs call __x86_indirect_thunk_r11 + * + * Verify all indirect calls are kCFI adorned by checking for the + * UD2. Notably, doing __nocfi calls to regular (cfi) functions is + * broken. + */ + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + struct symbol *sym = insn->sym; + + if (sym && (sym->type == STT_NOTYPE || + sym->type == STT_FUNC) && !sym->nocfi) { + struct instruction *prev = + prev_insn_same_sym(file, insn); + + if (!prev || prev->type != INSN_BUG) { + WARN_INSN(insn, "no-cfi indirect call!"); + warnings++; + } + } + } + return warnings; } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 0a2fa3ac0079..df8434d3b744 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -70,6 +70,7 @@ struct symbol { u8 local_label : 1; u8 frame_pointer : 1; u8 ignore : 1; + u8 nocfi : 1; struct list_head pv_target; struct reloc *relocs; struct section *group_sec; -- cgit v1.2.3 From 89d912e494f786e79f69ed9d567a8842c71dbb03 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:27 +0200 Subject: bpf: Add dynptr type for skb metadata Add a dynptr type, similar to skb dynptr, but for the skb metadata access. The dynptr provides an alternative to __sk_buff->data_meta for accessing the custom metadata area allocated using the bpf_xdp_adjust_meta() helper. More importantly, it abstracts away the fact where the storage for the custom metadata lives, which opens up the way to persist the metadata by relocating it as the skb travels through the network stack layers. Writes to skb metadata invalidate any existing skb payload and metadata slices. While this is more restrictive that needed at the moment, it leaves the door open to reallocating the metadata on writes, and should be only a minor inconvenience to the users. Only the program types which can access __sk_buff->data_meta today are allowed to create a dynptr for skb metadata at the moment. We need to modify the network stack to persist the metadata across layers before opening up access to other BPF hooks. Once more BPF hooks gain access to skb_meta dynptr, we will also need to add a read-only variant of the helper similar to bpf_dynptr_from_skb_rdonly. skb_meta dynptr ops are stubbed out and implemented by subsequent changes. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-1-8a39e636e0fb@cloudflare.com --- include/linux/bpf.h | 7 ++++++- kernel/bpf/helpers.c | 7 +++++++ kernel/bpf/log.c | 2 ++ kernel/bpf/verifier.c | 15 +++++++++++++-- net/core/filter.c | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc700925b802..ec527b476dba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -767,12 +767,15 @@ enum bpf_type_flag { */ MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ + DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1358,6 +1361,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_SKB, /* Underlying data is a xdp_buff */ BPF_DYNPTR_TYPE_XDP, + /* Points to skb_metadata_end()-skb_metadata_len() */ + BPF_DYNPTR_TYPE_SKB_META, }; int bpf_dynptr_check_size(u32 size); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6b4877e85a68..9552b32208c5 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1780,6 +1780,8 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_XDP: return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); + case BPF_DYNPTR_TYPE_SKB_META: + return -EOPNOTSUPP; /* not implemented */ default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; @@ -1836,6 +1838,8 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, if (flags) return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); + case BPF_DYNPTR_TYPE_SKB_META: + return -EOPNOTSUPP; /* not implemented */ default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; @@ -1882,6 +1886,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3 return (unsigned long)(ptr->data + ptr->offset + offset); case BPF_DYNPTR_TYPE_SKB: case BPF_DYNPTR_TYPE_XDP: + case BPF_DYNPTR_TYPE_SKB_META: /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ return 0; default: @@ -2710,6 +2715,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false); return buffer__opt; } + case BPF_DYNPTR_TYPE_SKB_META: + return NULL; /* not implemented */ default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 38050f4ee400..e4983c1303e7 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -498,6 +498,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type) return "skb"; case BPF_DYNPTR_TYPE_XDP: return "xdp"; + case BPF_DYNPTR_TYPE_SKB_META: + return "skb_meta"; case BPF_DYNPTR_TYPE_INVALID: return ""; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4f69a9e9af6..5964bed40ffb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -674,6 +674,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_SKB; case DYNPTR_TYPE_XDP: return BPF_DYNPTR_TYPE_XDP; + case DYNPTR_TYPE_SKB_META: + return BPF_DYNPTR_TYPE_SKB_META; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -690,6 +692,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) return DYNPTR_TYPE_SKB; case BPF_DYNPTR_TYPE_XDP: return DYNPTR_TYPE_XDP; + case BPF_DYNPTR_TYPE_SKB_META: + return DYNPTR_TYPE_SKB_META; default: return 0; } @@ -2274,7 +2278,8 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg) { return base_type(reg->type) == PTR_TO_MEM && - (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP); + (reg->type & + (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)); } /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ @@ -11641,7 +11646,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (dynptr_type == BPF_DYNPTR_TYPE_INVALID) return -EFAULT; - if (dynptr_type == BPF_DYNPTR_TYPE_SKB) + if (dynptr_type == BPF_DYNPTR_TYPE_SKB || + dynptr_type == BPF_DYNPTR_TYPE_SKB_META) /* this will trigger clear_all_pkt_pointers(), which will * invalidate all dynptr slices associated with the skb */ @@ -12228,6 +12234,7 @@ enum special_kfunc_type { KF_bpf_rbtree_right, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, + KF_bpf_dynptr_from_skb_meta, KF_bpf_dynptr_slice, KF_bpf_dynptr_slice_rdwr, KF_bpf_dynptr_clone, @@ -12277,9 +12284,11 @@ BTF_ID(func, bpf_rbtree_right) #ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +BTF_ID(func, bpf_dynptr_from_skb_meta) #else BTF_ID_UNUSED BTF_ID_UNUSED +BTF_ID_UNUSED #endif BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) @@ -13253,6 +13262,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_SKB; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) { dynptr_arg_type |= DYNPTR_TYPE_XDP; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { + dynptr_arg_type |= DYNPTR_TYPE_SKB_META; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; diff --git a/net/core/filter.c b/net/core/filter.c index da391e2b0788..31b4b50dbadf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12007,6 +12007,36 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, return 0; } +/** + * bpf_dynptr_from_skb_meta() - Initialize a dynptr to the skb metadata area. + * @skb_: socket buffer carrying the metadata + * @flags: future use, must be zero + * @ptr__uninit: dynptr to initialize + * + * Set up a dynptr for access to the metadata area earlier allocated from the + * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to + * &__sk_buff->data_meta. + * + * Return: + * * %0 - dynptr ready to use + * * %-EINVAL - invalid flags, dynptr set to null + */ +__bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, + struct bpf_dynptr *ptr__uninit) +{ + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct sk_buff *skb = (struct sk_buff *)skb_; + + if (flags) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb)); + + return 0; +} + __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags, struct bpf_dynptr *ptr__uninit) { @@ -12181,6 +12211,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_kfunc_check_set_skb) +BTF_KFUNCS_START(bpf_kfunc_check_set_skb_meta) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta) + BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_KFUNCS_END(bpf_kfunc_check_set_xdp) @@ -12202,6 +12236,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .set = &bpf_kfunc_check_set_skb, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_skb_meta = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_skb_meta, +}; + static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_xdp, @@ -12237,6 +12276,8 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb_meta); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb_meta); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); -- cgit v1.2.3 From 6877cd392baecf816c2ba896a9d42874628004a5 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:28 +0200 Subject: bpf: Enable read/write access to skb metadata through a dynptr Now that we can create a dynptr to skb metadata, make reads to the metadata area possible with bpf_dynptr_read() or through a bpf_dynptr_slice(), and make writes to the metadata area possible with bpf_dynptr_write() or through a bpf_dynptr_slice_rdwr(). Note that for cloned skbs which share data with the original, we limit the skb metadata dynptr to be read-only since we don't unclone on a bpf_dynptr_write to metadata. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-2-8a39e636e0fb@cloudflare.com --- include/linux/filter.h | 6 ++++++ kernel/bpf/helpers.c | 10 +++++++--- net/core/filter.c | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1e7fd3ee759e..9ed21b65e2e9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1784,6 +1784,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, unsigned long len, bool flush); +void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1818,6 +1819,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi unsigned long len, bool flush) { } + +static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) +{ + return NULL; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9552b32208c5..cdffd74ddbe6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1781,7 +1781,8 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s case BPF_DYNPTR_TYPE_XDP: return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_SKB_META: - return -EOPNOTSUPP; /* not implemented */ + memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len); + return 0; default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; @@ -1839,7 +1840,10 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); case BPF_DYNPTR_TYPE_SKB_META: - return -EOPNOTSUPP; /* not implemented */ + if (flags) + return -EINVAL; + memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len); + return 0; default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; @@ -2716,7 +2720,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, return buffer__opt; } case BPF_DYNPTR_TYPE_SKB_META: - return NULL; /* not implemented */ + return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset); default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; diff --git a/net/core/filter.c b/net/core/filter.c index 31b4b50dbadf..63f3baee2daf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11990,6 +11990,16 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return func; } +/** + * bpf_skb_meta_pointer() - Gets a mutable pointer within the skb metadata area. + * @skb: socket buffer carrying the metadata + * @offset: offset into the metadata area, must be <= skb_metadata_len() + */ +void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) +{ + return skb_metadata_end(skb) - skb_metadata_len(skb) + offset; +} + __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, struct bpf_dynptr *ptr__uninit) @@ -12017,6 +12027,9 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to * &__sk_buff->data_meta. * + * If passed @skb_ is a clone which shares the data with the original, the + * dynptr will be read-only. This limitation may be lifted in the future. + * * Return: * * %0 - dynptr ready to use * * %-EINVAL - invalid flags, dynptr set to null @@ -12034,6 +12047,9 @@ __bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb)); + if (skb_cloned(skb)) + bpf_dynptr_set_rdonly(ptr); + return 0; } -- cgit v1.2.3 From 807221d3c5ff6e3c91ff57bc82a0b7a541462e20 Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Tue, 12 Aug 2025 14:35:21 +0800 Subject: misc: rtsx_pci: Add separate CD/WP pin polarity reversal support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the Card Detect (CD) and Write Protect (WP) pins shared the same reverse polarity setting in the configuration space. This meant both signals were reversed together, without the ability to configure them individually. This patch introduces two new parameters: sd_cd_reverse_en – enable reverse polarity for the CD pin. sd_wp_reverse_en – enable reverse polarity for the WP pin. With this change, the controller can now support: 1.Reversing both CD and WP pins together (original behavior). 2.Reversing CD and WP pins separately (newly added behavior), if supported by the configuration space. This provides greater flexibility when dealing with devices that have independent polarity requirements for CD and WP pins. Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20250812063521.2427696-1-ricky_wu@realtek.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rts5227.c | 13 ++++++++++--- drivers/misc/cardreader/rts5228.c | 12 ++++++++++-- drivers/misc/cardreader/rts5249.c | 16 +++++++++++++--- drivers/misc/cardreader/rts5264.c | 20 ++++++++++++++++---- drivers/misc/cardreader/rts5264.h | 1 + drivers/misc/cardreader/rtsx_pcr.h | 2 ++ include/linux/rtsx_pci.h | 2 ++ 7 files changed, 54 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index cd512284bfb3..46444bb47f65 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -79,6 +79,10 @@ static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); if (rtsx_reg_check_reverse_socket(reg)) pcr->flags |= PCR_REVERSE_SOCKET; + if (rtsx_reg_check_cd_reverse(reg)) + pcr->option.sd_cd_reverse_en = 1; + if (rtsx_reg_check_wp_reverse(reg)) + pcr->option.sd_wp_reverse_en = 1; } static void rts5227_init_from_cfg(struct rtsx_pcr *pcr) @@ -127,8 +131,10 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) /* Configure force_clock_req */ if (pcr->flags & PCR_REVERSE_SOCKET) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x30); - else - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x00); + else { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } if (CHK_PCI_PID(pcr, 0x522A)) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_AUTOLOAD_CFG1, @@ -350,6 +356,8 @@ void rts5227_init_params(struct rtsx_pcr *pcr) pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl; pcr->reg_pm_ctrl3 = PM_CTRL3; + pcr->option.sd_cd_reverse_en = 0; + pcr->option.sd_wp_reverse_en = 0; } static int rts522a_optimize_phy(struct rtsx_pcr *pcr) @@ -508,5 +516,4 @@ void rts522a_init_params(struct rtsx_pcr *pcr) pcr->hw_param.interrupt_en |= SD_OC_INT_EN; pcr->hw_param.ocp_glitch = SD_OCP_GLITCH_10M; pcr->option.sd_800mA_ocp_thd = RTS522A_OCP_THD_800; - } diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c index 0c7f10bcf6f1..db7e735ac24f 100644 --- a/drivers/misc/cardreader/rts5228.c +++ b/drivers/misc/cardreader/rts5228.c @@ -84,6 +84,10 @@ static void rtsx5228_fetch_vendor_settings(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); if (rtsx_reg_check_reverse_socket(reg)) pcr->flags |= PCR_REVERSE_SOCKET; + if (rtsx_reg_check_cd_reverse(reg)) + pcr->option.sd_cd_reverse_en = 1; + if (rtsx_reg_check_wp_reverse(reg)) + pcr->option.sd_wp_reverse_en = 1; } static int rts5228_optimize_phy(struct rtsx_pcr *pcr) @@ -432,8 +436,10 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr) if (pcr->flags & PCR_REVERSE_SOCKET) rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x30); - else - rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); + else { + rtsx_pci_write_register(pcr, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_write_register(pcr, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } /* * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced @@ -720,4 +726,6 @@ void rts5228_init_params(struct rtsx_pcr *pcr) hw_param->interrupt_en |= SD_OC_INT_EN; hw_param->ocp_glitch = SD_OCP_GLITCH_800U; option->sd_800mA_ocp_thd = RTS5228_LDO1_OCP_THD_930; + option->sd_cd_reverse_en = 0; + option->sd_wp_reverse_en = 0; } diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index 6c81040e18be..38aefd8db452 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -60,6 +60,7 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) pci_read_config_dword(pdev, PCR_SETTING_REG1, ®); pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + pci_write_config_dword(pdev, 0x718, 0x0007C000); if (!rtsx_vendor_setting_valid(reg)) { pcr_dbg(pcr, "skip fetch vendor setting\n"); @@ -82,6 +83,10 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); if (rtsx_reg_check_reverse_socket(reg)) pcr->flags |= PCR_REVERSE_SOCKET; + if (rtsx_reg_check_cd_reverse(reg)) + pcr->option.sd_cd_reverse_en = 1; + if (rtsx_reg_check_wp_reverse(reg)) + pcr->option.sd_wp_reverse_en = 1; } static void rts5249_init_from_cfg(struct rtsx_pcr *pcr) @@ -254,9 +259,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) /* Configure driving */ rts5249_fill_driving(pcr, OUTPUT_3V3); if (pcr->flags & PCR_REVERSE_SOCKET) - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0); - else - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x30); + else { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); @@ -572,6 +579,9 @@ void rts5249_init_params(struct rtsx_pcr *pcr) option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5249_DEF; option->ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF; + + option->sd_cd_reverse_en = 0; + option->sd_wp_reverse_en = 0; } static int rts524a_write_phy(struct rtsx_pcr *pcr, u8 addr, u16 val) diff --git a/drivers/misc/cardreader/rts5264.c b/drivers/misc/cardreader/rts5264.c index d050c9fff7ac..99a2d5ea6421 100644 --- a/drivers/misc/cardreader/rts5264.c +++ b/drivers/misc/cardreader/rts5264.c @@ -527,8 +527,16 @@ static void rts5264_init_from_hw(struct rtsx_pcr *pcr) pcr->rtd3_en = rts5264_reg_to_rtd3(lval2); - if (rts5264_reg_check_reverse_socket(lval2)) - pcr->flags |= PCR_REVERSE_SOCKET; + if (rts5264_reg_check_reverse_socket(lval2)) { + if (is_version_higher_than(pcr, PID_5264, RTS5264_IC_VER_B)) + pcr->option.sd_cd_reverse_en = 1; + else + pcr->flags |= PCR_REVERSE_SOCKET; + } + + if (rts5264_reg_check_wp_reverse(lval2) && + is_version_higher_than(pcr, PID_5264, RTS5264_IC_VER_B)) + pcr->option.sd_wp_reverse_en = 1; pci_read_config_dword(pdev, setting_reg1, &lval1); pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg1, lval1); @@ -622,8 +630,10 @@ static int rts5264_extra_init_hw(struct rtsx_pcr *pcr) if (pcr->flags & PCR_REVERSE_SOCKET) rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x30); - else - rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); + else { + rtsx_pci_write_register(pcr, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_write_register(pcr, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } /* * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced @@ -957,4 +967,6 @@ void rts5264_init_params(struct rtsx_pcr *pcr) hw_param->interrupt_en |= (SD_OC_INT_EN | SD_OVP_INT_EN); hw_param->ocp_glitch = SD_OCP_GLITCH_800U | SDVIO_OCP_GLITCH_800U; option->sd_800mA_ocp_thd = RTS5264_LDO1_OCP_THD_1150; + option->sd_cd_reverse_en = 0; + option->sd_wp_reverse_en = 0; } diff --git a/drivers/misc/cardreader/rts5264.h b/drivers/misc/cardreader/rts5264.h index f3e81daa708d..611ee253367c 100644 --- a/drivers/misc/cardreader/rts5264.h +++ b/drivers/misc/cardreader/rts5264.h @@ -14,6 +14,7 @@ #define rts5264_reg_to_aspm(reg) \ (((~(reg) >> 28) & 0x02) | (((reg) >> 28) & 0x01)) #define rts5264_reg_check_reverse_socket(reg) ((reg) & 0x04) +#define rts5264_reg_check_wp_reverse(reg) ((reg) & 0x8000) #define rts5264_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 22) & 0x03) #define rts5264_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 16) & 0x03) #define rts5264_reg_to_rtd3(reg) ((reg) & 0x08) diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index 8e5951b61143..40562ff2be13 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -100,6 +100,8 @@ static inline u8 map_sd_drive(int idx) #define rtsx_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x03) #define rtsx_reg_to_card_drive_sel(reg) ((((reg) >> 25) & 0x01) << 6) #define rtsx_reg_check_reverse_socket(reg) ((reg) & 0x4000) +#define rtsx_reg_check_cd_reverse(reg) ((reg) & 0x800000) +#define rtsx_reg_check_wp_reverse(reg) ((reg) & 0x400000) #define rts5209_reg_to_aspm(reg) (((reg) >> 5) & 0x03) #define rts5209_reg_check_ms_pmos(reg) (!((reg) & 0x08)) #define rts5209_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 3) & 0x07) diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 3b4c36705a9b..3c5689356004 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1160,6 +1160,8 @@ struct rtsx_cr_option { bool ocp_en; u8 sd_400mA_ocp_thd; u8 sd_800mA_ocp_thd; + u8 sd_cd_reverse_en; + u8 sd_wp_reverse_en; }; /* -- cgit v1.2.3 From f5597840ac907858ad2a462b00e4a68fd199121e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 14 Jul 2025 23:34:14 +0800 Subject: char: misc: Disallow registering miscdevice whose minor > MISC_DYNAMIC_MINOR Currently, It is allowed to register miscdevice with minor > 255 which is defined by macro MISC_DYNAMIC_MINOR, and cause: - Chaos regarding division and management of minor codes. - Registering failure if the minor was allocated to other dynamic request. Fortunately, in-kernel users have not had such usage yet. Fix by refusing to register miscdevice whose minor > 255. Also bring in a very simple minor code space division and management: < 255 : Fixed minor code == 255 : Indicator to request dynamic minor code > 255 : Dynamic minor code requested, 1048320 minor codes totally And all fixed minors allocated should be registered in 'linux/miscdevice.h' Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-3-2ed949665bde@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/misc.c | 6 ++++++ include/linux/miscdevice.h | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 558302a64dd9..b8e66466184f 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -210,6 +210,12 @@ int misc_register(struct miscdevice *misc) int err = 0; bool is_dynamic = (misc->minor == MISC_DYNAMIC_MINOR); + if (misc->minor > MISC_DYNAMIC_MINOR) { + pr_err("Invalid fixed minor %d for miscdevice '%s'\n", + misc->minor, misc->name); + return -EINVAL; + } + INIT_LIST_HEAD(&misc->list); mutex_lock(&misc_mtx); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3e6deb00fc85..565b88efeb23 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -71,6 +71,14 @@ #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 #define RFKILL_MINOR 242 + +/* + * Misc char device minor code space division related to below macro: + * + * < 255 : Fixed minor code + * == 255 : Indicator to request dynamic minor code + * > 255 : Dynamic minor code requested, 1048320 minor codes totally. + */ #define MISC_DYNAMIC_MINOR 255 struct miscdevice { -- cgit v1.2.3 From d7f8d0758b975db8406c91cf242d46cd9611ba3e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 14 Jul 2025 23:34:18 +0800 Subject: char: misc: Register fixed minor EISA_EEPROM_MINOR in linux/miscdevice.h Move fixed minor EISA_EEPROM_MINOR definition to linux/miscdevice.h. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-7-2ed949665bde@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/eisa_eeprom.c | 2 -- include/linux/miscdevice.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 443b15422fc1..601cbb22574f 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -15,8 +15,6 @@ #include #include -#define EISA_EEPROM_MINOR 241 - static loff_t eisa_eeprom_llseek(struct file *file, loff_t offset, int origin) { return fixed_size_llseek(file, offset, origin, HPEE_MAX_LENGTH); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 565b88efeb23..7d0aa718499c 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -70,6 +70,7 @@ #define UHID_MINOR 239 #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 +#define EISA_EEPROM_MINOR 241 #define RFKILL_MINOR 242 /* -- cgit v1.2.3 From 1d6249c1ce826fcf03c695973095eb4a50fb7fd2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 11 Aug 2025 11:13:35 +0200 Subject: sysfs: remove bin_attribute::read_new/write_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These transitional fields are now unused and unnecessary. Remove them and their logic in the sysfs core. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250811-sysfs-const-bin_attr-final-v4-1-7b6053fd58bb@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 22 +++++----------------- include/linux/sysfs.h | 4 ---- 2 files changed, 5 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1ca143d2f22a..3825e780cc58 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -97,12 +97,9 @@ static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf, count = size - pos; } - if (!battr->read && !battr->read_new) + if (!battr->read) return -EIO; - if (battr->read_new) - return battr->read_new(of->file, kobj, battr, buf, pos, count); - return battr->read(of->file, kobj, battr, buf, pos, count); } @@ -161,12 +158,9 @@ static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf, if (!count) return 0; - if (!battr->write && !battr->write_new) + if (!battr->write) return -EIO; - if (battr->write_new) - return battr->write_new(of->file, kobj, battr, buf, pos, count); - return battr->write(of->file, kobj, battr, buf, pos, count); } @@ -335,19 +329,13 @@ int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, const struct kernfs_ops *ops; struct kernfs_node *kn; - if (battr->read && battr->read_new) - return -EINVAL; - - if (battr->write && battr->write_new) - return -EINVAL; - if (battr->mmap) ops = &sysfs_bin_kfops_mmap; - else if ((battr->read || battr->read_new) && (battr->write || battr->write_new)) + else if (battr->read && battr->write) ops = &sysfs_bin_kfops_rw; - else if (battr->read || battr->read_new) + else if (battr->read) ops = &sysfs_bin_kfops_ro; - else if (battr->write || battr->write_new) + else if (battr->write) ops = &sysfs_bin_kfops_wo; else ops = &sysfs_file_kfops_empty; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index f418aae4f113..7544f6d81c05 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -308,12 +308,8 @@ struct bin_attribute { struct address_space *(*f_mapping)(void); ssize_t (*read)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); - ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *, - char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); - ssize_t (*write_new)(struct file *, struct kobject *, - const struct bin_attribute *, char *, loff_t, size_t); loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *, loff_t, int); int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr, -- cgit v1.2.3 From 44d454fcffa8b08d6d66df132121c1d387fa85db Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 11 Aug 2025 11:13:36 +0200 Subject: sysfs: remove attribute_group::bin_attrs_new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This transitional field is now unused and unnecessary. Remove it. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250811-sysfs-const-bin_attr-final-v4-2-7b6053fd58bb@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 7544f6d81c05..9a25a2911652 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -106,10 +106,7 @@ struct attribute_group { const struct bin_attribute *, int); struct attribute **attrs; - union { - const struct bin_attribute *const *bin_attrs; - const struct bin_attribute *const *bin_attrs_new; - }; + const struct bin_attribute *const *bin_attrs; }; #define SYSFS_PREALLOC 010000 @@ -293,7 +290,7 @@ __ATTRIBUTE_GROUPS(_name) #define BIN_ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ - .bin_attrs_new = _name##_attrs, \ + .bin_attrs = _name##_attrs, \ }; \ __ATTRIBUTE_GROUPS(_name) -- cgit v1.2.3 From 74f44ad07d1063933c237a7db16f6a4036643d60 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 30 Jul 2025 17:46:14 +0100 Subject: mmc: tmio: Add 64-bit read/write support for SD_BUF0 in polling mode As per the RZ/{G2L,G3E} HW manual SD_BUF0 can be accessed by 16/32/64 bits. Most of the data transfer in SD/SDIO/eMMC mode is more than 8 bytes. During testing it is found that, if the DMA buffer is not aligned to 128 bit it fallback to PIO mode. In such cases, 64-bit access is much more efficient than the current 16-bit. Tested-by: Wolfram Sang Reviewed-by: Wolfram Sang Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20250730164618.233117-2-biju.das.jz@bp.renesas.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc.h | 15 +++++++++++++++ drivers/mmc/host/tmio_mmc_core.c | 33 +++++++++++++++++++++++++++++++++ include/linux/platform_data/tmio.h | 3 +++ 3 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index d730b7633ae1..c8cdb1c0722e 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -242,6 +243,20 @@ static inline void sd_ctrl_read32_rep(struct tmio_mmc_host *host, int addr, ioread32_rep(host->ctl + (addr << host->bus_shift), buf, count); } +#ifdef CONFIG_64BIT +static inline void sd_ctrl_read64_rep(struct tmio_mmc_host *host, int addr, + u64 *buf, int count) +{ + readsq(host->ctl + (addr << host->bus_shift), buf, count); +} + +static inline void sd_ctrl_write64_rep(struct tmio_mmc_host *host, int addr, + const u64 *buf, int count) +{ + writesq(host->ctl + (addr << host->bus_shift), buf, count); +} +#endif + static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val) { diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 21c2f9095bac..775e0d9353d5 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -349,6 +349,39 @@ static void tmio_mmc_transfer_data(struct tmio_mmc_host *host, /* * Transfer the data */ +#ifdef CONFIG_64BIT + if (host->pdata->flags & TMIO_MMC_64BIT_DATA_PORT) { + u64 *buf64 = (u64 *)buf; + u64 data = 0; + + if (count >= 8) { + if (is_read) + sd_ctrl_read64_rep(host, CTL_SD_DATA_PORT, + buf64, count >> 3); + else + sd_ctrl_write64_rep(host, CTL_SD_DATA_PORT, + buf64, count >> 3); + } + + /* if count was multiple of 8 */ + if (!(count & 0x7)) + return; + + buf64 += count >> 3; + count %= 8; + + if (is_read) { + sd_ctrl_read64_rep(host, CTL_SD_DATA_PORT, &data, 1); + memcpy(buf64, &data, count); + } else { + memcpy(&data, buf64, count); + sd_ctrl_write64_rep(host, CTL_SD_DATA_PORT, &data, 1); + } + + return; + } +#endif + if (host->pdata->flags & TMIO_MMC_32BIT_DATA_PORT) { u32 data = 0; u32 *buf32 = (u32 *)buf; diff --git a/include/linux/platform_data/tmio.h b/include/linux/platform_data/tmio.h index b060124ba1ae..426291713b83 100644 --- a/include/linux/platform_data/tmio.h +++ b/include/linux/platform_data/tmio.h @@ -47,6 +47,9 @@ /* Some controllers have a CBSY bit */ #define TMIO_MMC_HAVE_CBSY BIT(11) +/* Some controllers have a 64-bit wide data port register */ +#define TMIO_MMC_64BIT_DATA_PORT BIT(12) + struct tmio_mmc_data { void *chan_priv_tx; void *chan_priv_rx; -- cgit v1.2.3 From d2e6fb2c31a07f34e5e7533df11431cb0d2ecf9f Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Tue, 12 Aug 2025 11:08:11 +0800 Subject: misc: rtsx: usb card reader: add OCP support This patch adds support for Over Current Protection (OCP) to the Realtek USB card reader driver. The OCP mechanism protects the hardware by detecting and handling current overload conditions. This implementation includes: - Register configurations to enable OCP monitoring. - Handling of OCP interrupt events and associated error reporting. - Card power management changes in response to OCP triggers. This enhancement improves the robustness of the driver when operating in environments where electrical anomalies may occur, particularly with SD and MS card interfaces. Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20250812030811.2426112-1-ricky_wu@realtek.com Signed-off-by: Ulf Hansson --- drivers/memstick/host/rtsx_usb_ms.c | 5 ++++- drivers/misc/cardreader/rtsx_usb.c | 7 +++++++ drivers/mmc/host/rtsx_usb_sdmmc.c | 33 ++++++++++++++++++++++++++++++--- include/linux/rtsx_usb.h | 11 +++++++++++ 4 files changed, 52 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 3878136227e4..9389e9643c24 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -216,7 +216,10 @@ static int ms_power_off(struct rtsx_usb_ms *host) rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_CLK_EN, MS_CLK_EN, 0); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_OE, MS_OUTPUT_EN, 0); - + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + POWER_MASK, POWER_OFF); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + POWER_MASK | LDO3318_PWR_MASK, POWER_OFF | LDO_SUSPEND); err = rtsx_usb_send_cmd(ucr, MODE_C, 100); if (err < 0) return err; diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index d007a4455ce5..1830e9ed2521 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -552,6 +552,10 @@ static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr) ret = rtsx_usb_send_cmd(ucr, MODE_C, 100); if (ret) return ret; + /* config OCP */ + rtsx_usb_write_register(ucr, OCPCTL, MS_OCP_DETECT_EN, MS_OCP_DETECT_EN); + rtsx_usb_write_register(ucr, OCPPARA1, 0xF0, 0x50); + rtsx_usb_write_register(ucr, OCPPARA2, 0x7, 0x3); /* config non-crystal mode */ rtsx_usb_read_register(ucr, CFG_MODE, &val); @@ -722,6 +726,9 @@ static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message) if (val & (SD_CD | MS_CD)) { device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child); return -EAGAIN; + } else { + /* if the card does not exists, clear OCP status */ + rtsx_usb_write_register(ucr, OCPCTL, MS_OCP_CLEAR, MS_OCP_CLEAR); } } else { /* There is an ongoing operation*/ diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c index c5f6b9df066b..e1ed39c657c3 100644 --- a/drivers/mmc/host/rtsx_usb_sdmmc.c +++ b/drivers/mmc/host/rtsx_usb_sdmmc.c @@ -48,7 +48,7 @@ struct rtsx_usb_sdmmc { bool ddr_mode; unsigned char power_mode; - + u16 ocp_stat; #ifdef RTSX_USB_USE_LEDS_CLASS struct led_classdev led; char led_name[32]; @@ -785,6 +785,9 @@ static int sdmmc_get_cd(struct mmc_host *mmc) mutex_unlock(&ucr->dev_mutex); + /* get OCP status */ + host->ocp_stat = (val >> 4) & 0x03; + /* Treat failed detection as non-exist */ if (err) goto no_card; @@ -795,6 +798,11 @@ static int sdmmc_get_cd(struct mmc_host *mmc) } no_card: + /* clear OCP status */ + if (host->ocp_stat & (MS_OCP_NOW | MS_OCP_EVER)) { + rtsx_usb_write_register(ucr, OCPCTL, MS_OCP_CLEAR, MS_OCP_CLEAR); + host->ocp_stat = 0; + } host->card_exist = false; return 0; } @@ -818,7 +826,11 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq) cmd->error = -ENOMEDIUM; goto finish_detect_card; } - + /* check OCP stat */ + if (host->ocp_stat & (MS_OCP_NOW | MS_OCP_EVER)) { + cmd->error = -ENOMEDIUM; + goto finish_detect_card; + } mutex_lock(&ucr->dev_mutex); mutex_lock(&host->host_mutex); @@ -952,6 +964,10 @@ static int sd_power_on(struct rtsx_usb_sdmmc *host) struct rtsx_ucr *ucr = host->ucr; int err; + if (host->ocp_stat & (MS_OCP_NOW | MS_OCP_EVER)) { + dev_dbg(sdmmc_dev(host), "over current\n"); + return -EIO; + } dev_dbg(sdmmc_dev(host), "%s\n", __func__); rtsx_usb_init_cmd(ucr); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_SELECT, 0x07, SD_MOD_SEL); @@ -977,9 +993,19 @@ static int sd_power_on(struct rtsx_usb_sdmmc *host) usleep_range(800, 1000); + rtsx_usb_init_cmd(ucr); + /* WA OCP issue: after OCP, there were problems with reopen card power */ + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, POWER_MASK, POWER_ON); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, FPDCTL, SSC_POWER_MASK, SSC_POWER_DOWN); + err = rtsx_usb_send_cmd(ucr, MODE_C, 100); + if (err) + return err; + msleep(20); + rtsx_usb_write_register(ucr, FPDCTL, SSC_POWER_MASK, SSC_POWER_ON); + usleep_range(180, 200); rtsx_usb_init_cmd(ucr); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, - POWER_MASK|LDO3318_PWR_MASK, POWER_ON|LDO_ON); + LDO3318_PWR_MASK, LDO_ON); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN); @@ -1332,6 +1358,7 @@ static void rtsx_usb_init_host(struct rtsx_usb_sdmmc *host) mmc->max_req_size = 524288; host->power_mode = MMC_POWER_OFF; + host->ocp_stat = 0; } static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev) diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index f267a06c6b1e..276b509c03e3 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -99,6 +99,17 @@ extern int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card); #define CD_MASK (SD_CD | MS_CD | XD_CD) #define SD_WP 0x08 +/* OCPCTL */ +#define MS_OCP_DETECT_EN 0x08 +#define MS_OCP_INT_EN 0x04 +#define MS_OCP_INT_CLR 0x02 +#define MS_OCP_CLEAR 0x01 + +/* OCPSTAT */ +#define MS_OCP_DETECT 0x80 +#define MS_OCP_NOW 0x02 +#define MS_OCP_EVER 0x01 + /* reader command field offset & parameters */ #define READ_REG_CMD 0 #define WRITE_REG_CMD 1 -- cgit v1.2.3 From 99e6cc80d5ce5af5781f84d20e4f3478d66ee8ee Mon Sep 17 00:00:00 2001 From: Benoît Monin Date: Mon, 18 Aug 2025 16:02:50 +0200 Subject: mmc: core: add mmc_read_tuning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a function to the MMC hosts to read some blocks of data as part of their tuning. This function only returns the status of the read operation, not the data read. Signed-off-by: Benoît Monin Link: https://lore.kernel.org/r/20250818-mobileye-emmc-for-upstream-4-v4-5-34ecb3995e96@bootlin.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_ops.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mmc/host.h | 1 + 2 files changed, 73 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 66283825513c..a952cc8265af 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -1077,3 +1077,75 @@ int mmc_sanitize(struct mmc_card *card, unsigned int timeout_ms) return err; } EXPORT_SYMBOL_GPL(mmc_sanitize); + +/** + * mmc_read_tuning() - read data blocks from the mmc + * @host: mmc host doing the read + * @blksz: data block size + * @blocks: number of blocks to read + * + * Read one or more blocks of data from the beginning of the mmc. This is a + * low-level helper for tuning operation. It is assumed that CMD23 can be used + * for multi-block read if the host supports it. + * + * Note: Allocate and free a temporary buffer to store the data read. The data + * is not available outside of the function, only the status of the read + * operation. + * + * Return: 0 in case of success, otherwise -EIO / -ENOMEM / -E2BIG + */ +int mmc_read_tuning(struct mmc_host *host, unsigned int blksz, unsigned int blocks) +{ + struct mmc_request mrq = {}; + struct mmc_command sbc = {}; + struct mmc_command cmd = {}; + struct mmc_command stop = {}; + struct mmc_data data = {}; + struct scatterlist sg; + void *buf; + unsigned int len; + + if (blocks > 1) { + if (mmc_host_can_cmd23(host)) { + mrq.sbc = &sbc; + sbc.opcode = MMC_SET_BLOCK_COUNT; + sbc.arg = blocks; + sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; + } + cmd.opcode = MMC_READ_MULTIPLE_BLOCK; + mrq.stop = &stop; + stop.opcode = MMC_STOP_TRANSMISSION; + stop.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC; + } else { + cmd.opcode = MMC_READ_SINGLE_BLOCK; + } + + mrq.cmd = &cmd; + cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; + + mrq.data = &data; + data.flags = MMC_DATA_READ; + data.blksz = blksz; + data.blocks = blocks; + data.blk_addr = 0; + data.sg = &sg; + data.sg_len = 1; + data.timeout_ns = 1000000000; + + if (check_mul_overflow(blksz, blocks, &len)) + return -E2BIG; + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + sg_init_one(&sg, buf, len); + + mmc_wait_for_req(host, &mrq); + kfree(buf); + + if (sbc.error || cmd.error || data.error) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(mmc_read_tuning); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 68f09a955a90..5ed5d203de23 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -743,5 +743,6 @@ int mmc_send_status(struct mmc_card *card, u32 *status); int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); +int mmc_read_tuning(struct mmc_host *host, unsigned int blksz, unsigned int blocks); #endif /* LINUX_MMC_HOST_H */ -- cgit v1.2.3 From c3f0c02997c7f8489fec259e28e0e04e9811edac Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 18 Aug 2025 08:40:26 -0700 Subject: net: Add skb_dstref_steal and skb_dstref_restore Going forward skb_dst_set will assert that skb dst_entry is empty during skb_dst_set to prevent potential leaks. There are few places that still manually manage dst_entry not using the helpers. Convert them to the following new helpers: - skb_dstref_steal that resets dst_entry and returns previous dst_entry value - skb_dstref_restore that restores dst_entry previously reset via skb_dstref_steal Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250818154032.3173645-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..7538ca507ee9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1159,6 +1159,38 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +/** + * skb_dstref_steal() - return current dst_entry value and clear it + * @skb: buffer + * + * Resets skb dst_entry without adjusting its reference count. Useful in + * cases where dst_entry needs to be temporarily reset and restored. + * Note that the returned value cannot be used directly because it + * might contain SKB_DST_NOREF bit. + * + * When in doubt, prefer skb_dst_drop() over skb_dstref_steal() to correctly + * handle dst_entry reference counting. + * + * Returns: original skb dst_entry. + */ +static inline unsigned long skb_dstref_steal(struct sk_buff *skb) +{ + unsigned long refdst = skb->_skb_refdst; + + skb->_skb_refdst = 0; + return refdst; +} + +/** + * skb_dstref_restore() - restore skb dst_entry removed via skb_dstref_steal() + * @skb: buffer + * @refdst: dst entry from a call to skb_dstref_steal() + */ +static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) +{ + skb->_skb_refdst = refdst; +} + /** * skb_dst_set - sets skb dst * @skb: buffer -- cgit v1.2.3 From a890348adcc993f48d1ae38f1174dc8de4c3c5ac Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 18 Aug 2025 08:40:32 -0700 Subject: net: Add skb_dst_check_unset To prevent dst_entry leaks, add warning when the non-NULL dst_entry is rewritten. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250818154032.3173645-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7538ca507ee9..ca8be45dd8be 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1159,6 +1159,12 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +static inline void skb_dst_check_unset(struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE((skb->_skb_refdst & SKB_DST_PTRMASK) && + !(skb->_skb_refdst & SKB_DST_NOREF)); +} + /** * skb_dstref_steal() - return current dst_entry value and clear it * @skb: buffer @@ -1188,6 +1194,7 @@ static inline unsigned long skb_dstref_steal(struct sk_buff *skb) */ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) { + skb_dst_check_unset(skb); skb->_skb_refdst = refdst; } @@ -1201,6 +1208,7 @@ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } @@ -1217,6 +1225,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; -- cgit v1.2.3 From 68889dfd547bd8eabc5a98b58475d7b901cf5129 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:09 +0000 Subject: mptcp: Fix up subflow's memcg when CONFIG_SOCK_CGROUP_DATA=n. When sk_alloc() allocates a socket, mem_cgroup_sk_alloc() sets sk->sk_memcg based on the current task. MPTCP subflow socket creation is triggered from userspace or an in-kernel worker. In the latter case, sk->sk_memcg is not what we want. So, we fix it up from the parent socket's sk->sk_memcg in mptcp_attach_cgroup(). Although the code is placed under #ifdef CONFIG_MEMCG, it is buried under #ifdef CONFIG_SOCK_CGROUP_DATA. The two configs are orthogonal. If CONFIG_MEMCG is enabled without CONFIG_SOCK_CGROUP_DATA, the subflow's memory usage is not charged correctly. Let's move the code out of the wrong ifdef guard. Note that sk->sk_memcg is freed in sk_prot_free() and the parent sk holds the refcnt of memcg->css here, so we don't need to use css_tryget(). Fixes: 3764b0c5651e3 ("mptcp: attach subflow socket to parent cgroup") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Matthieu Baerts (NGI0) Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 6 ++++++ mm/memcontrol.c | 13 +++++++++++++ net/mptcp/subflow.c | 11 +++-------- 3 files changed, 22 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 785173aa0739..25921fbec685 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1604,6 +1604,7 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); +void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); #if BITS_PER_LONG < 64 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) @@ -1661,6 +1662,11 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg); #define mem_cgroup_sockets_enabled 0 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; static inline void mem_cgroup_sk_free(struct sock *sk) { }; + +static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) +{ +} + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8dd7fbed5a94..46713b9ece06 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5024,6 +5024,19 @@ void mem_cgroup_sk_free(struct sock *sk) css_put(&sk->sk_memcg->css); } +void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) +{ + if (sk->sk_memcg == newsk->sk_memcg) + return; + + mem_cgroup_sk_free(newsk); + + if (sk->sk_memcg) + css_get(&sk->sk_memcg->css); + + newsk->sk_memcg = sk->sk_memcg; +} + /** * mem_cgroup_charge_skmem - charge socket memory * @memcg: memcg to charge diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3f1b62a9fe88..c8a7e4b59db1 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1717,19 +1717,14 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child) /* only the additional subflows created by kworkers have to be modified */ if (cgroup_id(sock_cgroup_ptr(parent_skcd)) != cgroup_id(sock_cgroup_ptr(child_skcd))) { -#ifdef CONFIG_MEMCG - struct mem_cgroup *memcg = parent->sk_memcg; - - mem_cgroup_sk_free(child); - if (memcg && css_tryget(&memcg->css)) - child->sk_memcg = memcg; -#endif /* CONFIG_MEMCG */ - cgroup_sk_free(child_skcd); *child_skcd = *parent_skcd; cgroup_sk_clone(child_skcd); } #endif /* CONFIG_SOCK_CGROUP_DATA */ + + if (mem_cgroup_sockets_enabled) + mem_cgroup_sk_inherit(parent, child); } static void mptcp_subflow_ops_override(struct sock *ssk) -- cgit v1.2.3 From bb178c6bc08525d758a57775458d644304011bf8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:16 +0000 Subject: net-memcg: Pass struct sock to mem_cgroup_sk_(un)?charge(). We will store a flag in the lowest bit of sk->sk_memcg. Then, we cannot pass the raw pointer to mem_cgroup_charge_skmem() and mem_cgroup_uncharge_skmem(). Let's pass struct sock to the functions. While at it, they are renamed to match other functions starting with mem_cgroup_sk_. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-9-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 29 ++++++++++++++++++++++++----- mm/memcontrol.c | 18 +++++++++++------- net/core/sock.c | 24 +++++++++++------------- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp_output.c | 3 +-- 5 files changed, 48 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 25921fbec685..0837d3de3a68 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1596,15 +1596,16 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, - gfp_t gfp_mask); -void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) + void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); +bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, + gfp_t gfp_mask); +void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages); #if BITS_PER_LONG < 64 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) @@ -1660,13 +1661,31 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); void reparent_shrinker_deferred(struct mem_cgroup *memcg); #else #define mem_cgroup_sockets_enabled 0 -static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; -static inline void mem_cgroup_sk_free(struct sock *sk) { }; + +static inline void mem_cgroup_sk_alloc(struct sock *sk) +{ +} + +static inline void mem_cgroup_sk_free(struct sock *sk) +{ +} static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) { } +static inline bool mem_cgroup_sk_charge(const struct sock *sk, + unsigned int nr_pages, + gfp_t gfp_mask) +{ + return false; +} + +static inline void mem_cgroup_sk_uncharge(const struct sock *sk, + unsigned int nr_pages) +{ +} + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d8a52d1d08fa..df3e9205c9e6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5043,17 +5043,19 @@ void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) } /** - * mem_cgroup_charge_skmem - charge socket memory - * @memcg: memcg to charge + * mem_cgroup_sk_charge - charge socket memory + * @sk: socket in memcg to charge * @nr_pages: number of pages to charge * @gfp_mask: reclaim mode * * Charges @nr_pages to @memcg. Returns %true if the charge fit within * @memcg's configured limit, %false if it doesn't. */ -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, - gfp_t gfp_mask) +bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, + gfp_t gfp_mask) { + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return memcg1_charge_skmem(memcg, nr_pages, gfp_mask); @@ -5066,12 +5068,14 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, } /** - * mem_cgroup_uncharge_skmem - uncharge socket memory - * @memcg: memcg to uncharge + * mem_cgroup_sk_uncharge - uncharge socket memory + * @sk: socket in memcg to uncharge * @nr_pages: number of pages to uncharge */ -void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) +void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages) { + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { memcg1_uncharge_skmem(memcg, nr_pages); return; diff --git a/net/core/sock.c b/net/core/sock.c index ab658fe23e1e..5537ca263858 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1041,8 +1041,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes) pages = sk_mem_pages(bytes); /* pre-charge to memcg */ - charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); + charged = mem_cgroup_sk_charge(sk, pages, + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!charged) return -ENOMEM; @@ -1054,7 +1054,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes) */ if (allocated > sk_prot_mem_limits(sk, 1)) { sk_memory_allocated_sub(sk, pages); - mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); + mem_cgroup_sk_uncharge(sk, pages); return -ENOMEM; } sk_forward_alloc_add(sk, pages << PAGE_SHIFT); @@ -3263,17 +3263,16 @@ EXPORT_SYMBOL(sk_wait_data); */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { + bool memcg_enabled = false, charged = false; struct proto *prot = sk->sk_prot; - struct mem_cgroup *memcg = NULL; - bool charged = false; long allocated; sk_memory_allocated_add(sk, amt); allocated = sk_memory_allocated(sk); if (mem_cgroup_sk_enabled(sk)) { - memcg = sk->sk_memcg; - charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()); + memcg_enabled = true; + charged = mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge()); if (!charged) goto suppress_allocation; } @@ -3347,10 +3346,9 @@ suppress_allocation: */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ - if (memcg && !charged) { - mem_cgroup_charge_skmem(memcg, amt, - gfp_memcg_charge() | __GFP_NOFAIL); - } + if (memcg_enabled && !charged) + mem_cgroup_sk_charge(sk, amt, + gfp_memcg_charge() | __GFP_NOFAIL); return 1; } } @@ -3360,7 +3358,7 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); if (charged) - mem_cgroup_uncharge_skmem(memcg, amt); + mem_cgroup_sk_uncharge(sk, amt); return 0; } @@ -3399,7 +3397,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) sk_memory_allocated_sub(sk, amount); if (mem_cgroup_sk_enabled(sk)) - mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); + mem_cgroup_sk_uncharge(sk, amount); if (sk_under_global_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 93569bbe00f4..0ef1eacd539d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -727,7 +727,7 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg) } if (amt) - mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp); + mem_cgroup_sk_charge(newsk, amt, gfp); kmem_cache_charge(newsk, gfp); release_sock(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 37fb320e6f70..dfbac0876d96 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3579,8 +3579,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size) sk_memory_allocated_add(sk, amt); if (mem_cgroup_sk_enabled(sk)) - mem_cgroup_charge_skmem(sk->sk_memcg, amt, - gfp_memcg_charge() | __GFP_NOFAIL); + mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL); } /* Send a FIN. The caller locks the socket for us. -- cgit v1.2.3 From b2ffd10cddde47cc6830e4981e91e3215def62b1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:17 +0000 Subject: net-memcg: Pass struct sock to mem_cgroup_sk_under_memory_pressure(). We will store a flag in the lowest bit of sk->sk_memcg. Then, we cannot pass the raw pointer to mem_cgroup_under_socket_pressure(). Let's pass struct sock to it and rename the function to match other functions starting with mem_cgroup_sk_. Note that the helper is moved to sock.h to use mem_cgroup_from_sk(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-10-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 18 ------------------ include/net/proto_memory.h | 2 +- include/net/sock.h | 22 ++++++++++++++++++++++ include/net/tcp.h | 2 +- 4 files changed, 24 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0837d3de3a68..fb27e3d2fdac 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1642,19 +1642,6 @@ static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) } #endif -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) -{ -#ifdef CONFIG_MEMCG_V1 - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) - return !!memcg->tcpmem_pressure; -#endif /* CONFIG_MEMCG_V1 */ - do { - if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) - return true; - } while ((memcg = parent_mem_cgroup(memcg))); - return false; -} - int alloc_shrinker_info(struct mem_cgroup *memcg); void free_shrinker_info(struct mem_cgroup *memcg); void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); @@ -1686,11 +1673,6 @@ static inline void mem_cgroup_sk_uncharge(const struct sock *sk, { } -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) -{ - return false; -} - static inline void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index 859e63de81c4..8e91a8fa31b5 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -32,7 +32,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return false; if (mem_cgroup_sk_enabled(sk) && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + mem_cgroup_sk_under_memory_pressure(sk)) return true; return !!READ_ONCE(*sk->sk_prot->memory_pressure); diff --git a/include/net/sock.h b/include/net/sock.h index 3efdf680401d..3bc4d566f7d0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2604,6 +2604,23 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk) { return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk); } + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + +#ifdef CONFIG_MEMCG_V1 + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return !!memcg->tcpmem_pressure; +#endif /* CONFIG_MEMCG_V1 */ + + do { + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) + return true; + } while ((memcg = parent_mem_cgroup(memcg))); + + return false; +} #else static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) { @@ -2614,6 +2631,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk) { return false; } + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + return false; +} #endif static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f01b6be6444..2936b8175950 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -276,7 +276,7 @@ extern unsigned long tcp_memory_pressure; static inline bool tcp_under_memory_pressure(const struct sock *sk) { if (mem_cgroup_sk_enabled(sk) && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + mem_cgroup_sk_under_memory_pressure(sk)) return true; return READ_ONCE(tcp_memory_pressure); -- cgit v1.2.3 From 8fc6056dcf79937c46c97fa4996cda65956437a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Aug 2025 10:44:31 +0800 Subject: f2fs: fix to detect potential corrupted nid in free_nid_list As reported, on-disk footer.ino and footer.nid is the same and out-of-range, let's add sanity check on f2fs_alloc_nid() to detect any potential corruption in free_nid_list. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 17 ++++++++++++++++- include/linux/f2fs_fs.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 27743b93e186..286e8a53f8e7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -27,12 +27,17 @@ static struct kmem_cache *free_nid_slab; static struct kmem_cache *nat_entry_set_slab; static struct kmem_cache *fsync_node_entry_slab; +static inline bool is_invalid_nid(struct f2fs_sb_info *sbi, nid_t nid) +{ + return nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid; +} + /* * Check whether the given nid is within node id range. */ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { - if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { + if (unlikely(is_invalid_nid(sbi, nid))) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", __func__, nid); @@ -2634,6 +2639,16 @@ retry: f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); i = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); + + if (unlikely(is_invalid_nid(sbi, i->nid))) { + spin_unlock(&nm_i->nid_list_lock); + f2fs_err(sbi, "Corrupted nid %u in free_nid_list", + i->nid); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_CORRUPTED_NID); + return false; + } + *nid = i->nid; __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 2f8b8bfc0e73..6afb4a13b81d 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -79,6 +79,7 @@ enum stop_cp_reason { STOP_CP_REASON_FLUSH_FAIL, STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_CORRUPTED_FREE_BITMAP, + STOP_CP_REASON_CORRUPTED_NID, STOP_CP_REASON_MAX, }; -- cgit v1.2.3 From 5f8a4f34f6dcf4b64c3cbcfd4aa5a8d7dbcd268d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 19 Aug 2025 09:39:15 -0700 Subject: bnxt_en: hsi: Update FW interface to 1.10.3.133 The major change is struct pcie_ctx_hw_stats_v2 which has new latency histograms added. Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250819163919.104075-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- include/linux/bnxt/hsi.h | 315 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 253 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h index 549231703bce..8c5dac3b3ef3 100644 --- a/include/linux/bnxt/hsi.h +++ b/include/linux/bnxt/hsi.h @@ -276,6 +276,10 @@ struct cmd_nums { #define HWRM_REG_POWER_QUERY 0xe1UL #define HWRM_CORE_FREQUENCY_QUERY 0xe2UL #define HWRM_REG_POWER_HISTOGRAM 0xe3UL + #define HWRM_MONITOR_PAX_HISTOGRAM_START 0xe4UL + #define HWRM_MONITOR_PAX_HISTOGRAM_COLLECT 0xe5UL + #define HWRM_STAT_QUERY_ROCE_STATS 0xe6UL + #define HWRM_STAT_QUERY_ROCE_STATS_EXT 0xe7UL #define HWRM_WOL_FILTER_ALLOC 0xf0UL #define HWRM_WOL_FILTER_FREE 0xf1UL #define HWRM_WOL_FILTER_QCFG 0xf2UL @@ -407,9 +411,8 @@ struct cmd_nums { #define HWRM_FUNC_LAG_UPDATE 0x1b1UL #define HWRM_FUNC_LAG_FREE 0x1b2UL #define HWRM_FUNC_LAG_QCFG 0x1b3UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_ADD 0x1c2UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_DELETE 0x1c3UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_QUERY 0x1c4UL + #define HWRM_FUNC_TTX_PACING_RATE_PROF_QUERY 0x1c3UL + #define HWRM_FUNC_TTX_PACING_RATE_QUERY 0x1c4UL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -441,6 +444,7 @@ struct cmd_nums { #define HWRM_MFG_WRITE_CERT_NVM 0x21cUL #define HWRM_PORT_POE_CFG 0x230UL #define HWRM_PORT_POE_QCFG 0x231UL + #define HWRM_PORT_PHY_FDRSTAT 0x232UL #define HWRM_UDCC_QCAPS 0x258UL #define HWRM_UDCC_CFG 0x259UL #define HWRM_UDCC_QCFG 0x25aUL @@ -453,6 +457,8 @@ struct cmd_nums { #define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x261UL #define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x262UL #define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x263UL + #define HWRM_QUEUE_ADPTV_QOS_RX_QCFG 0x264UL + #define HWRM_QUEUE_ADPTV_QOS_TX_QCFG 0x265UL #define HWRM_TF 0x2bcUL #define HWRM_TF_VERSION_GET 0x2bdUL #define HWRM_TF_SESSION_OPEN 0x2c6UL @@ -551,6 +557,8 @@ struct cmd_nums { #define HWRM_DBG_COREDUMP_CAPTURE 0xff2cUL #define HWRM_DBG_PTRACE 0xff2dUL #define HWRM_DBG_SIM_CABLE_STATE 0xff2eUL + #define HWRM_DBG_TOKEN_QUERY_AUTH_IDS 0xff2fUL + #define HWRM_DBG_TOKEN_CFG 0xff30UL #define HWRM_NVM_GET_VPD_FIELD_INFO 0xffeaUL #define HWRM_NVM_SET_VPD_FIELD_INFO 0xffebUL #define HWRM_NVM_DEFRAG 0xffecUL @@ -632,8 +640,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 97 -#define HWRM_VERSION_STR "1.10.3.97" +#define HWRM_VERSION_RSVD 133 +#define HWRM_VERSION_STR "1.10.3.133" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -688,6 +696,7 @@ struct hwrm_ver_get_output { #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL + #define VER_GET_RESP_DEV_CAPS_CFG_DEBUG_TOKEN_SUPPORTED 0x20000UL u8 roce_fw_maj_8b; u8 roce_fw_min_8b; u8 roce_fw_bld_8b; @@ -872,7 +881,8 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_REPRESENTOR_PAIR_CHANGE 0x4eUL #define ASYNC_EVENT_CMPL_EVENT_ID_VF_STAT_CHANGE 0x4fUL #define ASYNC_EVENT_CMPL_EVENT_ID_HOST_COREDUMP 0x50UL - #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x51UL + #define ASYNC_EVENT_CMPL_EVENT_ID_ADPTV_QOS 0x51UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x52UL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -1344,7 +1354,8 @@ struct hwrm_async_event_cmpl_dbg_buf_producer { #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA2_TRACE 0x9UL #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_RIGP1_TRACE 0xaUL #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE 0xbUL - #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE 0xcUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE }; /* hwrm_async_event_cmpl_hwrm_error (size:128b/16B) */ @@ -1401,7 +1412,11 @@ struct hwrm_async_event_cmpl_error_report_base { #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUP_UDCC_SES 0x7UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DB_DROP 0x8UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MD_TEMP 0x9UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR 0xaUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR }; /* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */ @@ -1914,6 +1929,12 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT3_RX_RATE_PROFILE_SEL_SUPPORTED 0x8UL #define FUNC_QCAPS_RESP_FLAGS_EXT3_BIDI_OPT_SUPPORTED 0x10UL #define FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED 0x20UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_ROCE_VF_DYN_ALLOC_SUPPORT 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_CHANGE_UDP_SRCPORT_SUPPORT 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_COMPLIANCE_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MULTI_L2_DB_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_SECURE_ATS_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MBUF_STATS_SUPPORTED 0x800UL __le16 max_roce_vfs; __le16 max_crypto_rx_flow_filters; u8 unused_3[3]; @@ -1931,7 +1952,7 @@ struct hwrm_func_qcfg_input { u8 unused_0[6]; }; -/* hwrm_func_qcfg_output (size:1344b/168B) */ +/* hwrm_func_qcfg_output (size:1408b/176B) */ struct hwrm_func_qcfg_output { __le16 error_code; __le16 req_type; @@ -2124,7 +2145,43 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK 0x1UL #define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK 0x2UL __le16 mirror_vnic_id; - u8 unused_7[7]; + u8 max_link_width; + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X1 0x1UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X2 0x2UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X4 0x4UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X8 0x8UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16 0x10UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_LAST FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16 + u8 max_link_speed; + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G1 0x1UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G2 0x2UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G3 0x3UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G4 0x4UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G5 0x5UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_LAST FUNC_QCFG_RESP_MAX_LINK_SPEED_G5 + u8 negotiated_link_width; + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X1 0x1UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X2 0x2UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X4 0x4UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X8 0x8UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16 0x10UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16 + u8 negotiated_link_speed; + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G1 0x1UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G2 0x2UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G3 0x3UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G4 0x4UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5 0x5UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5 + u8 unused_7[2]; + u8 pcie_compliance; + u8 unused_8; + __le16 l2_db_multi_page_size_kb; + u8 unused_9[5]; u8 valid; }; @@ -2322,6 +2379,7 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF 0x200UL #define FUNC_CFG_REQ_ENABLES2_XID_PARTITION_CFG 0x400UL #define FUNC_CFG_REQ_ENABLES2_PHYSICAL_SLOT_NUMBER 0x800UL + #define FUNC_CFG_REQ_ENABLES2_PCIE_COMPLIANCE 0x1000UL u8 port_kdnet_mode; #define FUNC_CFG_REQ_PORT_KDNET_MODE_DISABLED 0x0UL #define FUNC_CFG_REQ_PORT_KDNET_MODE_ENABLED 0x1UL @@ -2353,7 +2411,8 @@ struct hwrm_func_cfg_input { __le16 xid_partition_cfg; #define FUNC_CFG_REQ_XID_PARTITION_CFG_TX_CK 0x1UL #define FUNC_CFG_REQ_XID_PARTITION_CFG_RX_CK 0x2UL - __le16 unused_2; + u8 pcie_compliance; + u8 unused_2; }; /* hwrm_func_cfg_output (size:128b/16B) */ @@ -2370,11 +2429,41 @@ struct hwrm_func_cfg_output { struct hwrm_func_cfg_cmd_err { u8 code; #define FUNC_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE 0x1UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX 0x2UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT 0x4UL - #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_OUT_OF_RANGE 0x1UL + #define FUNC_CFG_CMD_ERR_CODE_NPAR_PARTITION_DOWN_FAILED 0x2UL + #define FUNC_CFG_CMD_ERR_CODE_TPID_SET_DFLT_VLAN_NOT_SET 0x3UL + #define FUNC_CFG_CMD_ERR_CODE_RES_ARRAY_ALLOC_FAILED 0x4UL + #define FUNC_CFG_CMD_ERR_CODE_TX_RING_ASSET_TEST_FAILED 0x5UL + #define FUNC_CFG_CMD_ERR_CODE_TX_RING_RES_UPDATE_FAILED 0x6UL + #define FUNC_CFG_CMD_ERR_CODE_APPLY_MAX_BW_FAILED 0x7UL + #define FUNC_CFG_CMD_ERR_CODE_ENABLE_EVB_FAILED 0x8UL + #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_ASSET_TEST_FAILED 0x9UL + #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_RES_UPDATE_FAILED 0xaUL + #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_ASSET_TEST_FAILED 0xbUL + #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_RES_UPDATE_FAILED 0xcUL + #define FUNC_CFG_CMD_ERR_CODE_NQ_ASSET_TEST_FAILED 0xdUL + #define FUNC_CFG_CMD_ERR_CODE_NQ_RES_UPDATE_FAILED 0xeUL + #define FUNC_CFG_CMD_ERR_CODE_RX_RING_ASSET_TEST_FAILED 0xfUL + #define FUNC_CFG_CMD_ERR_CODE_RX_RING_RES_UPDATE_FAILED 0x10UL + #define FUNC_CFG_CMD_ERR_CODE_VNIC_ASSET_TEST_FAILED 0x11UL + #define FUNC_CFG_CMD_ERR_CODE_VNIC_RES_UPDATE_FAILED 0x12UL + #define FUNC_CFG_CMD_ERR_CODE_FAILED_TO_START_STATS_THREAD 0x13UL + #define FUNC_CFG_CMD_ERR_CODE_RDMA_SRIOV_DISABLED 0x14UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_DISABLED 0x15UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_ASSET_TEST_FAILED 0x16UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_RES_UPDATE_FAILED 0x17UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_DISABLED 0x18UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_ASSET_TEST_FAILED 0x19UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_RES_UPDATE_FAILED 0x1aUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_DISABLED 0x1bUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_ASSET_TEST_FAILED 0x1cUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_RES_UPDATE_FAILED 0x1dUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_DISABLED 0x1eUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_ASSET_TEST_FAILED 0x1fUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_RES_UPDATE_FAILED 0x20UL + #define FUNC_CFG_CMD_ERR_CODE_INVALID_KDNET_MODE 0x21UL + #define FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL 0x22UL + #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL u8 unused_0[7]; }; @@ -3780,6 +3869,7 @@ struct hwrm_func_backing_store_cfg_v2_input { #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID __le16 instance; @@ -3865,6 +3955,7 @@ struct hwrm_func_backing_store_qcfg_v2_input { #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID __le16 instance; @@ -3904,6 +3995,7 @@ struct hwrm_func_backing_store_qcfg_v2_output { #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA1_TRACE 0x27UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ERR_QPC_TRACE 0x2aUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID __le16 instance; @@ -4027,6 +4119,7 @@ struct hwrm_func_backing_store_qcaps_v2_input { #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID u8 rsvd[6]; @@ -4070,6 +4163,7 @@ struct hwrm_func_backing_store_qcaps_v2_output { #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID __le16 entry_size; @@ -4216,6 +4310,10 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE 0x100000UL #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE 0x200000UL #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE 0x400000UL + #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_ENABLE 0x800000UL + #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_DISABLE 0x1000000UL + #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_ENABLE 0x2000000UL + #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_DISABLE 0x4000000UL __le32 enables; #define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE 0x1UL #define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX 0x2UL @@ -4703,6 +4801,8 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT 0x1UL #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN 0x2UL #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SPEEDS2_SUPPORTED 0x4UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_LINK_TRAINING 0x8UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_PRECODING 0x10UL char phy_vendor_name[16]; char phy_vendor_partnumber[16]; __le16 support_pam4_speeds; @@ -4725,6 +4825,10 @@ struct hwrm_port_phy_qcfg_output { u8 link_down_reason; #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_RF 0x1UL #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_OTP_SPEED_VIOLATION 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_CABLE_REMOVED 0x4UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_MODULE_FAULT 0x8UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_BMC_REQUEST 0x10UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_TX_LASER_DISABLED 0x20UL __le16 support_speeds2; #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_1GB 0x1UL #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_10GB 0x2UL @@ -5882,9 +5986,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led0_color_caps; - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led1_id; u8 led1_type; #define PORT_LED_QCAPS_RESP_LED1_TYPE_SPEED 0x0UL @@ -5900,9 +6005,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led1_color_caps; - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led2_id; u8 led2_type; #define PORT_LED_QCAPS_RESP_LED2_TYPE_SPEED 0x0UL @@ -5918,9 +6024,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led2_color_caps; - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led3_id; u8 led3_type; #define PORT_LED_QCAPS_RESP_LED3_TYPE_SPEED 0x0UL @@ -5936,9 +6043,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led3_color_caps; - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 unused_4[3]; u8 valid; }; @@ -7036,9 +7144,22 @@ struct hwrm_vnic_rss_cfg_output { /* hwrm_vnic_rss_cfg_cmd_err (size:64b/8B) */ struct hwrm_vnic_rss_cfg_cmd_err { u8 code; - #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL - #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL - #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY + #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_UNABLE_TO_GET_RSS_CFG 0x2UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_UNSUPPORTED 0x3UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_ERR 0x4UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_MODE_FAIL 0x5UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_RING_GRP_TABLE_ALLOC_ERR 0x6UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_KEY_ALLOC_ERR 0x7UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_DMA_FAILED 0x8UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_RX_RING_ALLOC_ERR 0x9UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_CMPL_RING_ALLOC_ERR 0xaUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HW_SET_RSS_FAILED 0xbUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_CTX_INVALID 0xcUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_INVALID 0xdUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID 0xeUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID u8 unused_0[7]; }; @@ -7177,7 +7298,7 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_output { u8 valid; }; -/* hwrm_ring_alloc_input (size:704b/88B) */ +/* hwrm_ring_alloc_input (size:768b/96B) */ struct hwrm_ring_alloc_input { __le16 req_type; __le16 cmpl_ring; @@ -7195,6 +7316,7 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE 0x400UL #define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID 0x800UL #define RING_ALLOC_REQ_ENABLES_RX_RATE_PROFILE_VALID 0x1000UL + #define RING_ALLOC_REQ_ENABLES_DPI_VALID 0x2000UL u8 ring_type; #define RING_ALLOC_REQ_RING_TYPE_L2_CMPL 0x0UL #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL @@ -7287,6 +7409,8 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_LAST RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE u8 unused_4; __le64 cq_handle; + __le16 dpi; + __le16 unused_5[3]; }; /* hwrm_ring_alloc_output (size:128b/16B) */ @@ -7776,7 +7900,10 @@ struct hwrm_cfa_l2_set_rx_mask_cmd_err { u8 code; #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN 0x0UL #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL - #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_MAX_VLAN_TAGS 0x2UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_VNIC_ID 0x3UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION 0x4UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION u8 unused_0[7]; }; @@ -8109,9 +8236,38 @@ struct hwrm_cfa_ntuple_filter_alloc_output { /* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */ struct hwrm_cfa_ntuple_filter_alloc_cmd_err { u8 code; - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_MAC 0x65UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_BC_MC_MAC 0x66UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_VNIC 0x67UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_PF_FID 0x68UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L2_CTXT_ID 0x69UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_CTXT_CFG 0x6aUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_DATA_FLD 0x6bUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_CFA_LAYOUT 0x6cUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_CTXT_ALLOC_FAIL 0x6dUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ROCE_FLOW_ERR 0x6eUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_OWNER_FID 0x6fUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_REF_CNT 0x70UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_FLOW_TYPE 0x71UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_IVLAN 0x72UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_MAX_VLAN_ID 0x73UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_TNL_REQ 0x74UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_ADDR 0x75UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_IVLAN 0x76UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR 0x77UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR_TYPE 0x78UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_T_L3_ADDR_TYPE 0x79UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DST_VNIC_ID 0x7aUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VNI 0x7bUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_DST_ID 0x7cUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_FAIL_ROCE_L2_FLOW 0x7dUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_NPAR_VLAN 0x7eUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ATSP_ADD 0x7fUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DFLT_VLAN_FAIL 0x80UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L3_TYPE 0x81UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW 0x82UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW u8 unused_0[7]; }; @@ -9181,7 +9337,7 @@ struct pcie_ctx_hw_stats { __le64 pcie_recovery_histogram; }; -/* pcie_ctx_hw_stats_v2 (size:4096b/512B) */ +/* pcie_ctx_hw_stats_v2 (size:4544b/568B) */ struct pcie_ctx_hw_stats_v2 { __le64 pcie_pl_signal_integrity; __le64 pcie_dl_signal_integrity; @@ -9212,6 +9368,9 @@ struct pcie_ctx_hw_stats_v2 { __le64 pcie_other_packet_count; __le64 pcie_blocked_packet_count; __le64 pcie_cmpl_packet_count; + __le32 pcie_rd_latency_histogram[12]; + __le32 pcie_rd_latency_all_normal_count; + __le32 unused_2; }; /* hwrm_stat_generic_qstats_input (size:256b/32B) */ @@ -9406,7 +9565,8 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF 0xc8UL #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_COUNT 0x12cUL #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND 0x12dUL - #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND + #define STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS 0x190UL + #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS __le16 len; u8 version; #define STRUCT_HDR_VERSION_0 0x0UL @@ -9459,11 +9619,13 @@ struct hwrm_fw_set_structured_data_output { /* hwrm_fw_set_structured_data_cmd_err (size:64b/8B) */ struct hwrm_fw_set_structured_data_cmd_err { u8 code; - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_ALREADY_ADDED 0x4UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG 0x5UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG u8 unused_0[7]; }; @@ -9487,7 +9649,9 @@ struct hwrm_fw_get_structured_data_input { #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER 0x201UL #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL 0x202UL #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL 0x300UL - #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_SUPPORTED 0x320UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE 0x321UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE u8 count; u8 unused_0; }; @@ -10172,7 +10336,8 @@ struct hwrm_dbg_log_buffer_flush_input { #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA2_TRACE 0x9UL #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP1_TRACE 0xaUL #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE 0xbUL - #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE 0xcUL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE u8 unused_1[2]; __le32 flags; #define DBG_LOG_BUFFER_FLUSH_REQ_FLAGS_FLUSH_ALL_BUFFERS 0x1UL @@ -10295,10 +10460,15 @@ struct hwrm_nvm_write_output { /* hwrm_nvm_write_cmd_err (size:64b/8B) */ struct hwrm_nvm_write_cmd_err { u8 code; - #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL - #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL - #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_NO_SPACE + #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL + #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL + #define NVM_WRITE_CMD_ERR_CODE_WRITE_FAILED 0x3UL + #define NVM_WRITE_CMD_ERR_CODE_REQD_ERASE_FAILED 0x4UL + #define NVM_WRITE_CMD_ERR_CODE_VERIFY_FAILED 0x5UL + #define NVM_WRITE_CMD_ERR_CODE_INVALID_HEADER 0x6UL + #define NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED 0x7UL + #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED u8 unused_0[7]; }; @@ -10438,7 +10608,11 @@ struct hwrm_nvm_get_dev_info_output { __le16 srt2_fw_minor; __le16 srt2_fw_build; __le16 srt2_fw_patch; - u8 unused_0[7]; + u8 security_soc_fw_major; + u8 security_soc_fw_minor; + u8 security_soc_fw_build; + u8 security_soc_fw_patch; + u8 unused_0[3]; u8 valid; }; @@ -10568,7 +10742,9 @@ struct hwrm_nvm_install_update_cmd_err { #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE 0x2UL #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK 0x3UL #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT 0x4UL - #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_DEFRAG_FAILED 0x5UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR 0x6UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR u8 unused_0[7]; }; @@ -10591,7 +10767,8 @@ struct hwrm_nvm_get_variable_input { __le16 index_2; __le16 index_3; u8 flags; - #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + #define NVM_GET_VARIABLE_REQ_FLAGS_VALIDATE_OPT_VALUE 0x2UL u8 unused_0; }; @@ -10606,18 +10783,25 @@ struct hwrm_nvm_get_variable_output { #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0 0x0UL #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL #define NVM_GET_VARIABLE_RESP_OPTION_NUM_LAST NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF - u8 unused_0[3]; + u8 flags; + #define NVM_GET_VARIABLE_RESP_FLAGS_VALIDATE_OPT_VALUE 0x1UL + u8 unused_0[2]; u8 valid; }; /* hwrm_nvm_get_variable_cmd_err (size:64b/8B) */ struct hwrm_nvm_get_variable_cmd_err { u8 code; - #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT + #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x4UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x5UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x6UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x7UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x8UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM u8 unused_0[7]; }; @@ -10667,10 +10851,17 @@ struct hwrm_nvm_set_variable_output { /* hwrm_nvm_set_variable_cmd_err (size:64b/8B) */ struct hwrm_nvm_set_variable_cmd_err { u8 code; - #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR + #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACTION_NOT_SUPPORTED 0x4UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x5UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x6UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x7UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x8UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x9UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM u8 unused_0[7]; }; -- cgit v1.2.3 From 5a774b64cd6a008f016119782a5d3f30ed0bf3b7 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 18 Aug 2025 09:51:21 +0200 Subject: net: phy: micrel: Add support for lan8842 The LAN8842 is a low-power, single port triple-speed (10BASE-T/ 100BASE-TX/ 1000BASE-T) ethernet physical layer transceiver (PHY) that supports transmission and reception of data on standard CAT-5, as well as CAT-5e and CAT-6, Unshielded Twisted Pair (UTP) cables. The LAN8842 supports industry-standard SGMII (Serial Gigabit Media Independent Interface) providing chip-to-chip connection to a Gigabit Ethernet MAC using a single serialized link (differential pair) in each direction. There are 2 variants of the lan8842. The one that supports timestamping (lan8842) and one that doesn't have timestamping (lan8832). Signed-off-by: Horatiu Vultur Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250818075121.1298170-5-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/phy/micrel.c | 209 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/micrel_phy.h | 1 + 2 files changed, 210 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index c621ed465d2e..04bd744920b0 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -448,6 +448,17 @@ struct kszphy_priv { struct kszphy_phy_stats phy_stats; }; +struct lan8842_phy_stats { + u64 rx_packets; + u64 rx_errors; + u64 tx_packets; + u64 tx_errors; +}; + +struct lan8842_priv { + struct lan8842_phy_stats phy_stats; +}; + static const struct kszphy_type lan8814_type = { .led_mode_reg = ~LAN8814_LED_CTRL_1, .cable_diag_reg = LAN8814_CABLE_DIAG, @@ -5768,6 +5779,188 @@ static int ksz9131_resume(struct phy_device *phydev) return kszphy_resume(phydev); } +#define LAN8842_SELF_TEST 14 /* 0x0e */ +#define LAN8842_SELF_TEST_RX_CNT_ENA BIT(8) +#define LAN8842_SELF_TEST_TX_CNT_ENA BIT(4) + +static int lan8842_probe(struct phy_device *phydev) +{ + struct lan8842_priv *priv; + int ret; + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phydev->priv = priv; + + /* Similar to lan8814 this PHY has a pin which needs to be pulled down + * to enable to pass any traffic through it. Therefore use the same + * function as lan8814 + */ + ret = lan8814_release_coma_mode(phydev); + if (ret) + return ret; + + /* Enable to count the RX and TX packets */ + ret = lanphy_write_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, + LAN8842_SELF_TEST, + LAN8842_SELF_TEST_RX_CNT_ENA | + LAN8842_SELF_TEST_TX_CNT_ENA); + if (ret < 0) + return ret; + + return 0; +} + +static int lan8842_config_init(struct phy_device *phydev) +{ + int ret; + + /* Reset the PHY */ + ret = lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + if (ret < 0) + return ret; + + /* To allow the PHY to control the LEDs the GPIOs of the PHY should have + * a function mode and not the GPIO. Apparently by default the value is + * GPIO and not function even though the datasheet it says that it is + * function. Therefore set this value. + */ + return lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_GPIO_EN2, 0); +} + +#define LAN8842_INTR_CTRL_REG 52 /* 0x34 */ + +static int lan8842_config_intr(struct phy_device *phydev) +{ + int err; + + lanphy_write_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8842_INTR_CTRL_REG, + LAN8814_INTR_CTRL_REG_INTR_ENABLE); + + /* enable / disable interrupts */ + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = lan8814_ack_interrupt(phydev); + if (err) + return err; + + err = phy_write(phydev, LAN8814_INTC, LAN8814_INT_LINK); + } else { + err = phy_write(phydev, LAN8814_INTC, 0); + if (err) + return err; + + err = lan8814_ack_interrupt(phydev); + } + + return err; +} + +static unsigned int lan8842_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + /* Inband configuration can be enabled or disabled using the registers + * PCS1G_ANEG_CONFIG. + */ + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; +} + +static int lan8842_config_inband(struct phy_device *phydev, unsigned int modes) +{ + bool enable; + + if (modes == LINK_INBAND_DISABLE) + enable = false; + else + enable = true; + + /* Disable or enable in-band autoneg with PCS Host side + * It has the same address as lan8814 + */ + return lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, + LAN8814_QSGMII_PCS1G_ANEG_CONFIG, + LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA, + enable ? LAN8814_QSGMII_PCS1G_ANEG_CONFIG_ANEG_ENA : 0); +} + +static irqreturn_t lan8842_handle_interrupt(struct phy_device *phydev) +{ + int ret = IRQ_NONE; + int irq_status; + + irq_status = phy_read(phydev, LAN8814_INTS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (irq_status & LAN8814_INT_LINK) { + phy_trigger_machine(phydev); + ret = IRQ_HANDLED; + } + + return ret; +} + +static u64 lan8842_get_stat(struct phy_device *phydev, int count, int *regs) +{ + u64 ret = 0; + int val; + + for (int j = 0; j < count; ++j) { + val = lanphy_read_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, + regs[j]); + if (val < 0) + return U64_MAX; + + ret <<= 16; + ret += val; + } + return ret; +} + +static int lan8842_update_stats(struct phy_device *phydev) +{ + struct lan8842_priv *priv = phydev->priv; + int rx_packets_regs[] = {88, 61, 60}; + int rx_errors_regs[] = {63, 62}; + int tx_packets_regs[] = {89, 85, 84}; + int tx_errors_regs[] = {87, 86}; + + priv->phy_stats.rx_packets = lan8842_get_stat(phydev, + ARRAY_SIZE(rx_packets_regs), + rx_packets_regs); + priv->phy_stats.rx_errors = lan8842_get_stat(phydev, + ARRAY_SIZE(rx_errors_regs), + rx_errors_regs); + priv->phy_stats.tx_packets = lan8842_get_stat(phydev, + ARRAY_SIZE(tx_packets_regs), + tx_packets_regs); + priv->phy_stats.tx_errors = lan8842_get_stat(phydev, + ARRAY_SIZE(tx_errors_regs), + tx_errors_regs); + + return 0; +} + +static void lan8842_get_phy_stats(struct phy_device *phydev, + struct ethtool_eth_phy_stats *eth_stats, + struct ethtool_phy_stats *stats) +{ + struct lan8842_priv *priv = phydev->priv; + + stats->rx_packets = priv->phy_stats.rx_packets; + stats->rx_errors = priv->phy_stats.rx_errors; + stats->tx_packets = priv->phy_stats.tx_packets; + stats->tx_errors = priv->phy_stats.tx_errors; +} + static struct phy_driver ksphy_driver[] = { { PHY_ID_MATCH_MODEL(PHY_ID_KS8737), @@ -5987,6 +6180,21 @@ static struct phy_driver ksphy_driver[] = { .resume = lan8841_resume, .cable_test_start = lan8814_cable_test_start, .cable_test_get_status = ksz886x_cable_test_get_status, +}, { + PHY_ID_MATCH_MODEL(PHY_ID_LAN8842), + .name = "Microchip LAN8842 Gigabit PHY", + .flags = PHY_POLL_CABLE_TEST, + .driver_data = &lan8814_type, + .probe = lan8842_probe, + .config_init = lan8842_config_init, + .config_intr = lan8842_config_intr, + .inband_caps = lan8842_inband_caps, + .config_inband = lan8842_config_inband, + .handle_interrupt = lan8842_handle_interrupt, + .get_phy_stats = lan8842_get_phy_stats, + .update_stats = lan8842_update_stats, + .cable_test_start = lan8814_cable_test_start, + .cable_test_get_status = ksz886x_cable_test_get_status, }, { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9131), .name = "Microchip KSZ9131 Gigabit PHY", @@ -6082,6 +6290,7 @@ static const struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_MATCH_MODEL(PHY_ID_LAN8814) }, { PHY_ID_MATCH_MODEL(PHY_ID_LAN8804) }, { PHY_ID_MATCH_MODEL(PHY_ID_LAN8841) }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN8842) }, { } }; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 9af01bdd86d2..ca691641788b 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -32,6 +32,7 @@ #define PHY_ID_LAN8814 0x00221660 #define PHY_ID_LAN8804 0x00221670 #define PHY_ID_LAN8841 0x00221650 +#define PHY_ID_LAN8842 0x002216C0 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 -- cgit v1.2.3 From 6c9468aad215a198742c8375b0415e42521c905c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:56:54 -0700 Subject: fscrypt: replace raw loads of info pointer with helper function Add and use a helper function fscrypt_get_inode_info_raw(). It loads an inode's fscrypt info pointer using a raw dereference, which is appropriate when the caller knows the key setup already happened. This eliminates most occurrences of inode::i_crypt_info in the source, in preparation for replacing that with a filesystem-specific field. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-2-ebiggers@kernel.org Signed-off-by: Christian Brauner --- fs/crypto/bio.c | 2 +- fs/crypto/crypto.c | 14 ++++++++------ fs/crypto/fname.c | 11 ++++++----- fs/crypto/hooks.c | 2 +- fs/crypto/inline_crypt.c | 12 +++++++----- fs/crypto/policy.c | 7 ++++--- include/linux/fscrypt.h | 16 ++++++++++++++++ 7 files changed, 43 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 486fcb2ecf13..0d746de4cd10 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -113,7 +113,7 @@ out: int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { - const struct fscrypt_inode_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); const unsigned int du_bits = ci->ci_data_unit_bits; const unsigned int du_size = 1U << du_bits; const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits; diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index b6ccab524fde..07f9cbfe3ea4 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -173,7 +173,7 @@ struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs, gfp_t gfp_flags) { const struct inode *inode = folio->mapping->host; - const struct fscrypt_inode_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); const unsigned int du_bits = ci->ci_data_unit_bits; const unsigned int du_size = 1U << du_bits; struct page *ciphertext_page; @@ -232,8 +232,9 @@ int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, { if (WARN_ON_ONCE(inode->i_sb->s_cop->supports_subblock_data_units)) return -EOPNOTSUPP; - return fscrypt_crypt_data_unit(inode->i_crypt_info, FS_ENCRYPT, - lblk_num, page, page, len, offs); + return fscrypt_crypt_data_unit(fscrypt_get_inode_info_raw(inode), + FS_ENCRYPT, lblk_num, page, page, len, + offs); } EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); @@ -255,7 +256,7 @@ int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs) { const struct inode *inode = folio->mapping->host; - const struct fscrypt_inode_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); const unsigned int du_bits = ci->ci_data_unit_bits; const unsigned int du_size = 1U << du_bits; u64 index = ((u64)folio->index << (PAGE_SHIFT - du_bits)) + @@ -305,8 +306,9 @@ int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, { if (WARN_ON_ONCE(inode->i_sb->s_cop->supports_subblock_data_units)) return -EOPNOTSUPP; - return fscrypt_crypt_data_unit(inode->i_crypt_info, FS_DECRYPT, - lblk_num, page, page, len, offs); + return fscrypt_crypt_data_unit(fscrypt_get_inode_info_raw(inode), + FS_DECRYPT, lblk_num, page, page, len, + offs); } EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index f9f6713e144f..fb77ad1ca74a 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -94,7 +94,7 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen) { - const struct fscrypt_inode_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); struct crypto_sync_skcipher *tfm = ci->ci_enc_key.tfm; SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); union fscrypt_iv iv; @@ -138,7 +138,7 @@ static int fname_decrypt(const struct inode *inode, const struct fscrypt_str *iname, struct fscrypt_str *oname) { - const struct fscrypt_inode_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); struct crypto_sync_skcipher *tfm = ci->ci_enc_key.tfm; SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); union fscrypt_iv iv; @@ -274,8 +274,9 @@ bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret) { - return __fscrypt_fname_encrypted_size(&inode->i_crypt_info->ci_policy, - orig_len, max_len, + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); + + return __fscrypt_fname_encrypted_size(&ci->ci_policy, orig_len, max_len, encrypted_len_ret); } EXPORT_SYMBOL_GPL(fscrypt_fname_encrypted_size); @@ -543,7 +544,7 @@ EXPORT_SYMBOL_GPL(fscrypt_match_name); */ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) { - const struct fscrypt_inode_info *ci = dir->i_crypt_info; + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(dir); WARN_ON_ONCE(!ci->ci_dirhash_key_initialized); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index e0b32ac841f7..7a5d4c168c49 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -199,7 +199,7 @@ int fscrypt_prepare_setflags(struct inode *inode, err = fscrypt_require_key(inode); if (err) return err; - ci = inode->i_crypt_info; + ci = fscrypt_get_inode_info_raw(inode); if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; mk = ci->ci_master_key; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index caaff809765b..5dee7c498bc8 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -263,7 +263,7 @@ int fscrypt_derive_sw_secret(struct super_block *sb, bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) { - return inode->i_crypt_info->ci_inlinecrypt; + return fscrypt_get_inode_info_raw(inode)->ci_inlinecrypt; } EXPORT_SYMBOL_GPL(__fscrypt_inode_uses_inline_crypto); @@ -307,7 +307,7 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, if (!fscrypt_inode_uses_inline_crypto(inode)) return; - ci = inode->i_crypt_info; + ci = fscrypt_get_inode_info_raw(inode); fscrypt_generate_dun(ci, first_lblk, dun); bio_crypt_set_ctx(bio, ci->ci_enc_key.blk_key, dun, gfp_mask); @@ -385,22 +385,24 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk) { const struct bio_crypt_ctx *bc = bio->bi_crypt_context; + const struct fscrypt_inode_info *ci; u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; if (!!bc != fscrypt_inode_uses_inline_crypto(inode)) return false; if (!bc) return true; + ci = fscrypt_get_inode_info_raw(inode); /* * Comparing the key pointers is good enough, as all I/O for each key * uses the same pointer. I.e., there's currently no need to support * merging requests where the keys are the same but the pointers differ. */ - if (bc->bc_key != inode->i_crypt_info->ci_enc_key.blk_key) + if (bc->bc_key != ci->ci_enc_key.blk_key) return false; - fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun); + fscrypt_generate_dun(ci, next_lblk, next_dun); return bio_crypt_dun_is_contiguous(bc, bio->bi_iter.bi_size, next_dun); } EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio); @@ -502,7 +504,7 @@ u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks) if (nr_blocks <= 1) return nr_blocks; - ci = inode->i_crypt_info; + ci = fscrypt_get_inode_info_raw(inode); if (!(fscrypt_policy_flags(&ci->ci_policy) & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) return nr_blocks; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 6ad30ae07c06..9d51f3500de3 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -727,7 +727,7 @@ const union fscrypt_policy *fscrypt_policy_to_inherit(struct inode *dir) err = fscrypt_require_key(dir); if (err) return ERR_PTR(err); - return &dir->i_crypt_info->ci_policy; + return &fscrypt_get_inode_info_raw(dir)->ci_policy; } return fscrypt_get_dummy_policy(dir->i_sb); @@ -746,7 +746,7 @@ const union fscrypt_policy *fscrypt_policy_to_inherit(struct inode *dir) */ int fscrypt_context_for_new_inode(void *ctx, struct inode *inode) { - struct fscrypt_inode_info *ci = inode->i_crypt_info; + struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); BUILD_BUG_ON(sizeof(union fscrypt_context) != FSCRYPT_SET_CONTEXT_MAX_SIZE); @@ -771,7 +771,7 @@ EXPORT_SYMBOL_GPL(fscrypt_context_for_new_inode); */ int fscrypt_set_context(struct inode *inode, void *fs_data) { - struct fscrypt_inode_info *ci = inode->i_crypt_info; + struct fscrypt_inode_info *ci; union fscrypt_context ctx; int ctxsize; @@ -783,6 +783,7 @@ int fscrypt_set_context(struct inode *inode, void *fs_data) * This may be the first time the inode number is available, so do any * delayed key setup that requires the inode number. */ + ci = fscrypt_get_inode_info_raw(inode); if (ci->ci_policy.version == FSCRYPT_POLICY_V2 && (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) fscrypt_hash_inode_number(ci, ci->ci_master_key); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 10dd161690a2..23c5198612d1 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -195,6 +195,22 @@ struct fscrypt_operations { int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags); +/* + * Load the inode's fscrypt info pointer, using a raw dereference. Since this + * uses a raw dereference with no memory barrier, it is appropriate to use only + * when the caller knows the inode's key setup already happened, resulting in + * non-NULL fscrypt info. E.g., the file contents en/decryption functions use + * this, since fscrypt_file_open() set up the key. + */ +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info_raw(const struct inode *inode) +{ + struct fscrypt_inode_info *ci = inode->i_crypt_info; + + VFS_WARN_ON_ONCE(ci == NULL); + return ci; +} + static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { -- cgit v1.2.3 From 93221de31a8df6710e02328f82dc68d7ab4ad9e6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:56:55 -0700 Subject: fscrypt: add support for info in fs-specific part of inode Add an inode_info_offs field to struct fscrypt_operations, and update fs/crypto/ to support it. When set to a nonzero value, it specifies the offset to the fscrypt_inode_info pointer within the filesystem-specific part of the inode structure, to be used instead of inode::i_crypt_info. Since this makes inode::i_crypt_info no longer necessarily used, update comments that mentioned it. This is a prerequisite for a later commit that removes inode::i_crypt_info, saving memory and improving cache efficiency with filesystems that don't support fscrypt. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-3-ebiggers@kernel.org Signed-off-by: Christian Brauner --- fs/crypto/fscrypt_private.h | 4 ++-- fs/crypto/keysetup.c | 43 ++++++++++++++++++++++++++----------------- include/linux/fscrypt.h | 22 ++++++++++++++++++---- 3 files changed, 46 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index d8b485b9881c..245e6b84aa17 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -249,8 +249,8 @@ struct fscrypt_prepared_key { * fscrypt_inode_info - the "encryption key" for an inode * * When an encrypted file's key is made available, an instance of this struct is - * allocated and stored in ->i_crypt_info. Once created, it remains until the - * inode is evicted. + * allocated and a pointer to it is stored in the file's in-memory inode. Once + * created, it remains until the inode is evicted. */ struct fscrypt_inode_info { diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 4f3b9ecbfe4e..c1f85715c276 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -642,15 +642,16 @@ fscrypt_setup_encryption_info(struct inode *inode, goto out; /* - * For existing inodes, multiple tasks may race to set ->i_crypt_info. - * So use cmpxchg_release(). This pairs with the smp_load_acquire() in - * fscrypt_get_inode_info(). I.e., here we publish ->i_crypt_info with - * a RELEASE barrier so that other tasks can ACQUIRE it. + * For existing inodes, multiple tasks may race to set the inode's + * fscrypt info pointer. So use cmpxchg_release(). This pairs with the + * smp_load_acquire() in fscrypt_get_inode_info(). I.e., publish the + * pointer with a RELEASE barrier so that other tasks can ACQUIRE it. */ - if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) { + if (cmpxchg_release(fscrypt_inode_info_addr(inode), NULL, crypt_info) == + NULL) { /* - * We won the race and set ->i_crypt_info to our crypt_info. - * Now link it into the master key's inode list. + * We won the race and set the inode's fscrypt info to our + * crypt_info. Now link it into the master key's inode list. */ if (mk) { crypt_info->ci_master_key = mk; @@ -681,13 +682,13 @@ out: * %false unless the operation being performed is needed in * order for files (or directories) to be deleted. * - * Set up ->i_crypt_info, if it hasn't already been done. + * Set up the inode's encryption key, if it hasn't already been done. * - * Note: unless ->i_crypt_info is already set, this isn't %GFP_NOFS-safe. So + * Note: unless the key setup was already done, this isn't %GFP_NOFS-safe. So * generally this shouldn't be called from within a filesystem transaction. * - * Return: 0 if ->i_crypt_info was set or was already set, *or* if the - * encryption key is unavailable. (Use fscrypt_has_encryption_key() to + * Return: 0 if the key is now set up, *or* if it couldn't be set up because the + * needed master key is absent. (Use fscrypt_has_encryption_key() to * distinguish these cases.) Also can return another -errno code. */ int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported) @@ -741,9 +742,9 @@ int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported) * ->i_ino doesn't need to be set yet. * @encrypt_ret: (output) set to %true if the new inode will be encrypted * - * If the directory is encrypted, set up its ->i_crypt_info in preparation for + * If the directory is encrypted, set up its encryption key in preparation for * encrypting the name of the new file. Also, if the new inode will be - * encrypted, set up its ->i_crypt_info and set *encrypt_ret=true. + * encrypted, set up its encryption key too and set *encrypt_ret=true. * * This isn't %GFP_NOFS-safe, and therefore it should be called before starting * any filesystem transaction to create the inode. For this reason, ->i_ino @@ -752,8 +753,8 @@ int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported) * This doesn't persist the new inode's encryption context. That still needs to * be done later by calling fscrypt_set_context(). * - * Return: 0 on success, -ENOKEY if the encryption key is missing, or another - * -errno code + * Return: 0 on success, -ENOKEY if a key needs to be set up for @dir or @inode + * but the needed master key is absent, or another -errno code */ int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret) @@ -800,8 +801,16 @@ EXPORT_SYMBOL_GPL(fscrypt_prepare_new_inode); */ void fscrypt_put_encryption_info(struct inode *inode) { - put_crypt_info(inode->i_crypt_info); - inode->i_crypt_info = NULL; + /* + * Ideally we'd start with a lightweight IS_ENCRYPTED() check here + * before proceeding to retrieve and check the pointer. However, during + * inode creation, the fscrypt_inode_info is set before S_ENCRYPTED. If + * an error occurs, it needs to be cleaned up regardless. + */ + struct fscrypt_inode_info **ci_addr = fscrypt_inode_info_addr(inode); + + put_crypt_info(*ci_addr); + *ci_addr = NULL; } EXPORT_SYMBOL(fscrypt_put_encryption_info); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 23c5198612d1..d7ff53accbfe 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -61,6 +61,12 @@ struct fscrypt_name { /* Crypto operations for filesystems */ struct fscrypt_operations { + /* + * The offset of the pointer to struct fscrypt_inode_info in the + * filesystem-specific part of the inode, relative to the beginning of + * the common part of the inode (the 'struct inode'). + */ + ptrdiff_t inode_info_offs; /* * If set, then fs/crypto/ will allocate a global bounce page pool the @@ -195,6 +201,14 @@ struct fscrypt_operations { int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags); +static inline struct fscrypt_inode_info ** +fscrypt_inode_info_addr(const struct inode *inode) +{ + if (inode->i_sb->s_cop->inode_info_offs == 0) + return (struct fscrypt_inode_info **)&inode->i_crypt_info; + return (void *)inode + inode->i_sb->s_cop->inode_info_offs; +} + /* * Load the inode's fscrypt info pointer, using a raw dereference. Since this * uses a raw dereference with no memory barrier, it is appropriate to use only @@ -205,7 +219,7 @@ int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, static inline struct fscrypt_inode_info * fscrypt_get_inode_info_raw(const struct inode *inode) { - struct fscrypt_inode_info *ci = inode->i_crypt_info; + struct fscrypt_inode_info *ci = *fscrypt_inode_info_addr(inode); VFS_WARN_ON_ONCE(ci == NULL); return ci; @@ -216,11 +230,11 @@ fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). - * I.e., another task may publish ->i_crypt_info concurrently, executing - * a RELEASE barrier. We need to use smp_load_acquire() here to safely + * I.e., another task may publish the fscrypt info concurrently, + * executing a RELEASE barrier. Use smp_load_acquire() here to safely * ACQUIRE the memory the other task published. */ - return smp_load_acquire(&inode->i_crypt_info); + return smp_load_acquire(fscrypt_inode_info_addr(inode)); } /** -- cgit v1.2.3 From ab90c2d2476c4dd6deddd089c7e83b858d135783 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:00 -0700 Subject: fs: remove inode::i_crypt_info Now that all fscrypt-capable filesystems store the pointer to fscrypt_inode_info in the filesystem-specific part of the inode structure, inode::i_crypt_info is no longer needed. Update fscrypt_inode_info_addr() to no longer support the fallback to inode::i_crypt_info. Finally, remove inode::i_crypt_info itself along with the now-unnecessary forward declaration of fscrypt_inode_info. The end result of the migration to the filesystem-specific pointer is memory savings on CONFIG_FS_ENCRYPTION=y kernels for all filesystems that don't support fscrypt. Specifically, their in-memory inodes are now smaller by the size of a pointer: either 4 or 8 bytes. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-8-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 5 ----- include/linux/fscrypt.h | 8 ++++++-- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..1dafa18169be 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -72,7 +72,6 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; @@ -780,10 +779,6 @@ struct inode { struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif -#ifdef CONFIG_FS_ENCRYPTION - struct fscrypt_inode_info *i_crypt_info; -#endif - #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index d7ff53accbfe..516aba5b858b 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -201,11 +201,15 @@ struct fscrypt_operations { int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags); +/* + * Returns the address of the fscrypt info pointer within the + * filesystem-specific part of the inode. (To save memory on filesystems that + * don't support fscrypt, a field in 'struct inode' itself is no longer used.) + */ static inline struct fscrypt_inode_info ** fscrypt_inode_info_addr(const struct inode *inode) { - if (inode->i_sb->s_cop->inode_info_offs == 0) - return (struct fscrypt_inode_info **)&inode->i_crypt_info; + VFS_WARN_ON_ONCE(inode->i_sb->s_cop->inode_info_offs == 0); return (void *)inode + inode->i_sb->s_cop->inode_info_offs; } -- cgit v1.2.3 From 2a7349add18e5915cd87251af5f98db1772b6131 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:01 -0700 Subject: fsverity: add support for info in fs-specific part of inode Add an inode_info_offs field to struct fsverity_operations, and update fs/verity/ to support it. When set to a nonzero value, it specifies the offset to the fsverity_info pointer within the filesystem-specific part of the inode structure, to be used instead of inode::i_verity_info. Since this makes inode::i_verity_info no longer necessarily used, update comments that mentioned it. This is a prerequisite for a later commit that removes inode::i_verity_info, saving memory and improving cache efficiency on filesystems that don't support fsverity. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-9-ebiggers@kernel.org Signed-off-by: Christian Brauner --- fs/verity/enable.c | 6 +++--- fs/verity/fsverity_private.h | 9 +++++---- fs/verity/open.c | 23 ++++++++++++----------- fs/verity/verify.c | 2 +- include/linux/fsverity.h | 44 ++++++++++++++++++++++++++++++++++---------- 5 files changed, 55 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 503268cf4296..89eccc4becf9 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -284,9 +284,9 @@ static int enable_verity(struct file *filp, /* Successfully enabled verity */ /* - * Readers can start using ->i_verity_info immediately, so it - * can't be rolled back once set. So don't set it until just - * after the filesystem has successfully enabled verity. + * Readers can start using the inode's verity info immediately, + * so it can't be rolled back once set. So don't set it until + * just after the filesystem has successfully enabled verity. */ fsverity_set_info(inode, vi); } diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index 5fe854a5b9ad..bc1d887c532e 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -63,10 +63,11 @@ struct merkle_tree_params { * fsverity_info - cached verity metadata for an inode * * When a verity file is first opened, an instance of this struct is allocated - * and stored in ->i_verity_info; it remains until the inode is evicted. It - * caches information about the Merkle tree that's needed to efficiently verify - * data read from the file. It also caches the file digest. The Merkle tree - * pages themselves are not cached here, but the filesystem may cache them. + * and a pointer to it is stored in the file's in-memory inode. It remains + * until the inode is evicted. It caches information about the Merkle tree + * that's needed to efficiently verify data read from the file. It also caches + * the file digest. The Merkle tree pages themselves are not cached here, but + * the filesystem may cache them. */ struct fsverity_info { struct merkle_tree_params tree_params; diff --git a/fs/verity/open.c b/fs/verity/open.c index c561e130cd0c..77b1c977af02 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -244,17 +244,17 @@ fail: void fsverity_set_info(struct inode *inode, struct fsverity_info *vi) { /* - * Multiple tasks may race to set ->i_verity_info, so use - * cmpxchg_release(). This pairs with the smp_load_acquire() in - * fsverity_get_info(). I.e., here we publish ->i_verity_info with a - * RELEASE barrier so that other tasks can ACQUIRE it. + * Multiple tasks may race to set the inode's verity info pointer, so + * use cmpxchg_release(). This pairs with the smp_load_acquire() in + * fsverity_get_info(). I.e., publish the pointer with a RELEASE + * barrier so that other tasks can ACQUIRE it. */ - if (cmpxchg_release(&inode->i_verity_info, NULL, vi) != NULL) { - /* Lost the race, so free the fsverity_info we allocated. */ + if (cmpxchg_release(fsverity_info_addr(inode), NULL, vi) != NULL) { + /* Lost the race, so free the verity info we allocated. */ fsverity_free_info(vi); /* - * Afterwards, the caller may access ->i_verity_info directly, - * so make sure to ACQUIRE the winning fsverity_info. + * Afterwards, the caller may access the inode's verity info + * directly, so make sure to ACQUIRE the winning verity info. */ (void)fsverity_get_info(inode); } @@ -350,7 +350,6 @@ int fsverity_get_descriptor(struct inode *inode, return 0; } -/* Ensure the inode has an ->i_verity_info */ static int ensure_verity_info(struct inode *inode) { struct fsverity_info *vi = fsverity_get_info(inode); @@ -395,8 +394,10 @@ EXPORT_SYMBOL_GPL(__fsverity_prepare_setattr); void __fsverity_cleanup_inode(struct inode *inode) { - fsverity_free_info(inode->i_verity_info); - inode->i_verity_info = NULL; + struct fsverity_info **vi_addr = fsverity_info_addr(inode); + + fsverity_free_info(*vi_addr); + *vi_addr = NULL; } EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode); diff --git a/fs/verity/verify.c b/fs/verity/verify.c index a1f00c3fd3b2..affc307eb6a6 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -245,7 +245,7 @@ verify_data_blocks(struct folio *data_folio, size_t len, size_t offset, unsigned long max_ra_pages) { struct inode *inode = data_folio->mapping->host; - struct fsverity_info *vi = inode->i_verity_info; + struct fsverity_info *vi = *fsverity_info_addr(inode); const unsigned int block_size = vi->tree_params.block_size; u64 pos = (u64)data_folio->index << PAGE_SHIFT; diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 1eb7eae580be..e0f132cb7839 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -28,6 +28,12 @@ /* Verity operations for filesystems */ struct fsverity_operations { + /** + * The offset of the pointer to struct fsverity_info in the + * filesystem-specific part of the inode, relative to the beginning of + * the common part of the inode (the 'struct inode'). + */ + ptrdiff_t inode_info_offs; /** * Begin enabling verity on the given file. @@ -124,15 +130,33 @@ struct fsverity_operations { #ifdef CONFIG_FS_VERITY +static inline struct fsverity_info ** +fsverity_info_addr(const struct inode *inode) +{ + if (inode->i_sb->s_vop->inode_info_offs == 0) + return (struct fsverity_info **)&inode->i_verity_info; + return (void *)inode + inode->i_sb->s_vop->inode_info_offs; +} + static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { /* - * Pairs with the cmpxchg_release() in fsverity_set_info(). - * I.e., another task may publish ->i_verity_info concurrently, - * executing a RELEASE barrier. We need to use smp_load_acquire() here - * to safely ACQUIRE the memory the other task published. + * Since this function can be called on inodes belonging to filesystems + * that don't support fsverity at all, and fsverity_info_addr() doesn't + * work on such filesystems, we have to start with an IS_VERITY() check. + * Checking IS_VERITY() here is also useful to minimize the overhead of + * fsverity_active() on non-verity files. + */ + if (!IS_VERITY(inode)) + return NULL; + + /* + * Pairs with the cmpxchg_release() in fsverity_set_info(). I.e., + * another task may publish the inode's verity info concurrently, + * executing a RELEASE barrier. Use smp_load_acquire() here to safely + * ACQUIRE the memory the other task published. */ - return smp_load_acquire(&inode->i_verity_info); + return smp_load_acquire(fsverity_info_addr(inode)); } /* enable.c */ @@ -156,11 +180,11 @@ void __fsverity_cleanup_inode(struct inode *inode); * fsverity_cleanup_inode() - free the inode's verity info, if present * @inode: an inode being evicted * - * Filesystems must call this on inode eviction to free ->i_verity_info. + * Filesystems must call this on inode eviction to free the inode's verity info. */ static inline void fsverity_cleanup_inode(struct inode *inode) { - if (inode->i_verity_info) + if (*fsverity_info_addr(inode)) __fsverity_cleanup_inode(inode); } @@ -267,12 +291,12 @@ static inline bool fsverity_verify_page(struct page *page) * fsverity_active() - do reads from the inode need to go through fs-verity? * @inode: inode to check * - * This checks whether ->i_verity_info has been set. + * This checks whether the inode's verity info has been set. * * Filesystems call this from ->readahead() to check whether the pages need to * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to * a race condition where the file is being read concurrently with - * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) + * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before the verity info.) * * Return: true if reads need to go through fs-verity, otherwise false */ @@ -287,7 +311,7 @@ static inline bool fsverity_active(const struct inode *inode) * @filp: the struct file being set up * * When opening a verity file, deny the open if it is for writing. Otherwise, - * set up the inode's ->i_verity_info if not already done. + * set up the inode's verity info if not already done. * * When combined with fscrypt, this must be called after fscrypt_file_open(). * Otherwise, we won't have the key set up to decrypt the verity metadata. -- cgit v1.2.3 From 818c659ac164e4e4639ceaedaccbdfebb1ef63b5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:05 -0700 Subject: fs: remove inode::i_verity_info Now that all fsverity-capable filesystems store the pointer to fsverity_info in the filesystem-specific part of the inode structure, inode::i_verity_info is no longer needed. Update fsverity_info_addr() to no longer support the fallback to inode::i_verity_info. Finally, remove inode::i_verity_info itself, and move the forward declaration of struct fsverity_info from fs.h (which no longer needs it) to fsverity.h. The end result of the migration to the filesystem-specific pointer is memory savings on CONFIG_FS_VERITY=y kernels for all filesystems that don't support fsverity. Specifically, their in-memory inodes are now smaller by the size of a pointer: either 4 or 8 bytes. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-13-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 5 ----- include/linux/fsverity.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1dafa18169be..12ecc6b0e6f9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -73,7 +73,6 @@ struct seq_file; struct workqueue_struct; struct iov_iter; struct fscrypt_operations; -struct fsverity_info; struct fsverity_operations; struct fsnotify_mark_connector; struct fsnotify_sb_info; @@ -779,10 +778,6 @@ struct inode { struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif -#ifdef CONFIG_FS_VERITY - struct fsverity_info *i_verity_info; -#endif - void *i_private; /* fs or device private pointer */ } __randomize_layout; diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index e0f132cb7839..844f7b8b56bb 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -26,6 +26,8 @@ /* Arbitrary limit to bound the kmalloc() size. Can be changed. */ #define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384 +struct fsverity_info; + /* Verity operations for filesystems */ struct fsverity_operations { /** @@ -130,11 +132,15 @@ struct fsverity_operations { #ifdef CONFIG_FS_VERITY +/* + * Returns the address of the verity info pointer within the filesystem-specific + * part of the inode. (To save memory on filesystems that don't support + * fsverity, a field in 'struct inode' itself is no longer used.) + */ static inline struct fsverity_info ** fsverity_info_addr(const struct inode *inode) { - if (inode->i_sb->s_vop->inode_info_offs == 0) - return (struct fsverity_info **)&inode->i_verity_info; + VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0); return (void *)inode + inode->i_sb->s_vop->inode_info_offs; } -- cgit v1.2.3 From 8a3d00dde63a339d31d1fdeead24ddfd4d459c70 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:06 -0700 Subject: fsverity: check IS_VERITY() in fsverity_cleanup_inode() Since getting the address of the fsverity_info has gotten a bit more expensive, make fsverity_cleanup_inode() check for IS_VERITY() instead. This avoids adding more overhead to non-verity files. This assumes that verity info is never set when !IS_VERITY(), which is currently true, but add a VFS_WARN_ON_ONCE() that asserts that. (This of course defeats the optimization, but only when CONFIG_VFS_DEBUG=y.) Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-14-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fsverity.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 844f7b8b56bb..5bc7280425a7 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -190,8 +190,15 @@ void __fsverity_cleanup_inode(struct inode *inode); */ static inline void fsverity_cleanup_inode(struct inode *inode) { - if (*fsverity_info_addr(inode)) + /* + * Only IS_VERITY() inodes can have verity info, so start by checking + * for IS_VERITY() (which is faster than retrieving the pointer to the + * verity info). This minimizes overhead for non-verity inodes. + */ + if (IS_VERITY(inode)) __fsverity_cleanup_inode(inode); + else + VFS_WARN_ON_ONCE(*fsverity_info_addr(inode) != NULL); } /* read_metadata.c */ -- cgit v1.2.3 From 0f07b7919d679050d354d3279faa74bdc7ce17a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:12 +0200 Subject: uprobes: Rename arch_uretprobe_trampoline function We are about to add uprobe trampoline, so cleaning up the namespace. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-3-jolsa@kernel.org --- arch/x86/kernel/uprobes.c | 2 +- include/linux/uprobes.h | 2 +- kernel/events/uprobes.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 6d383839e839..77050e5a4680 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -338,7 +338,7 @@ extern u8 uretprobe_trampoline_entry[]; extern u8 uretprobe_trampoline_end[]; extern u8 uretprobe_syscall_check[]; -void *arch_uprobe_trampoline(unsigned long *psize) +void *arch_uretprobe_trampoline(unsigned long *psize) { static uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct pt_regs *regs = task_pt_regs(current); diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 516217c39094..01112f27cd21 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -224,7 +224,7 @@ extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); extern void uprobe_handle_trampoline(struct pt_regs *regs); -extern void *arch_uprobe_trampoline(unsigned long *psize); +extern void *arch_uretprobe_trampoline(unsigned long *psize); extern unsigned long uprobe_get_trampoline_vaddr(void); #else /* !CONFIG_UPROBES */ struct uprobes_state { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1cbfe3cfe573..dd4dd156c956 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1726,7 +1726,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) return ret; } -void * __weak arch_uprobe_trampoline(unsigned long *psize) +void * __weak arch_uretprobe_trampoline(unsigned long *psize) { static uprobe_opcode_t insn = UPROBE_SWBP_INSN; @@ -1758,7 +1758,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); - insns = arch_uprobe_trampoline(&insns_size); + insns = arch_uretprobe_trampoline(&insns_size); arch_uprobe_copy_ixol(area->page, 0, insns, insns_size); if (!xol_add_vma(mm, area)) -- cgit v1.2.3 From 82afdd05a16a424409682e06a53d6afcda038d30 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:13 +0200 Subject: uprobes: Make copy_from_page global Making copy_from_page global and adding uprobe prefix. Adding the uprobe prefix to copy_to_page as well for symmetry. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-4-jolsa@kernel.org --- include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 01112f27cd21..7447e15559b8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -226,6 +226,7 @@ extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, extern void uprobe_handle_trampoline(struct pt_regs *regs); extern void *arch_uretprobe_trampoline(unsigned long *psize); extern unsigned long uprobe_get_trampoline_vaddr(void); +extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index dd4dd156c956..f993a3422083 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -177,7 +177,7 @@ bool __weak is_trap_insn(uprobe_opcode_t *insn) return is_swbp_insn(insn); } -static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) +void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) { void *kaddr = kmap_atomic(page); memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); @@ -205,7 +205,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * is a trap variant; uprobes always wins over any other (gdb) * breakpoint. */ - copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); + uprobe_copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); is_swbp = is_swbp_insn(&old_opcode); if (is_swbp_insn(new_opcode)) { @@ -1051,7 +1051,7 @@ static int __copy_insn(struct address_space *mapping, struct file *filp, if (IS_ERR(page)) return PTR_ERR(page); - copy_from_page(page, offset, insn, nbytes); + uprobe_copy_from_page(page, offset, insn, nbytes); put_page(page); return 0; @@ -1397,7 +1397,7 @@ struct uprobe *uprobe_register(struct inode *inode, return ERR_PTR(-EINVAL); /* - * This ensures that copy_from_page(), copy_to_page() and + * This ensures that uprobe_copy_from_page(), copy_to_page() and * __update_ref_ctr() can't cross page boundary. */ if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE)) @@ -2393,7 +2393,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) if (result < 0) return result; - copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); + uprobe_copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); put_page(page); out: /* This needs to return true for any variant of the trap insn */ -- cgit v1.2.3 From 33d7b2beaf34a3c0f6406bc76f6e1b1755150ad9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:14 +0200 Subject: uprobes: Add uprobe_write function Adding uprobe_write function that does what uprobe_write_opcode did so far, but allows to pass verify callback function that checks the memory location before writing the opcode. It will be used in following changes to implement specific checking logic for instruction update. The uprobe_write_opcode now calls uprobe_write with verify_opcode as the verify callback. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Masami Hiramatsu (Google) Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-5-jolsa@kernel.org --- include/linux/uprobes.h | 5 +++++ kernel/events/uprobes.c | 14 ++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 7447e15559b8..e13382054435 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -187,6 +187,9 @@ struct uprobes_state { struct xol_area *xol_area; }; +typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, + uprobe_opcode_t *opcode); + extern void __init uprobes_init(void); extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); @@ -195,6 +198,8 @@ extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); +extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, + uprobe_opcode_t opcode, uprobe_write_verify_t verify); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f993a3422083..838ac40e91e6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -399,7 +399,7 @@ static bool orig_page_is_identical(struct vm_area_struct *vma, return identical; } -static int __uprobe_write_opcode(struct vm_area_struct *vma, +static int __uprobe_write(struct vm_area_struct *vma, struct folio_walk *fw, struct folio *folio, unsigned long opcode_vaddr, uprobe_opcode_t opcode) { @@ -487,6 +487,12 @@ remap: */ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, uprobe_opcode_t opcode) +{ + return uprobe_write(auprobe, vma, opcode_vaddr, opcode, verify_opcode); +} + +int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, + const unsigned long opcode_vaddr, uprobe_opcode_t opcode, uprobe_write_verify_t verify) { const unsigned long vaddr = opcode_vaddr & PAGE_MASK; struct mm_struct *mm = vma->vm_mm; @@ -509,7 +515,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, * page that we can safely modify. Use FOLL_WRITE to trigger a write * fault if required. When unregistering, we might be lucky and the * anon page is already gone. So defer write faults until really - * required. Use FOLL_SPLIT_PMD, because __uprobe_write_opcode() + * required. Use FOLL_SPLIT_PMD, because __uprobe_write() * cannot deal with PMDs yet. */ if (is_register) @@ -521,7 +527,7 @@ retry: goto out; folio = page_folio(page); - ret = verify_opcode(page, opcode_vaddr, &opcode); + ret = verify(page, opcode_vaddr, &opcode); if (ret <= 0) { folio_put(folio); goto out; @@ -560,7 +566,7 @@ retry: /* Walk the page tables again, to perform the actual update. */ if (folio_walk_start(&fw, vma, vaddr, 0)) { if (fw.page == page) - ret = __uprobe_write_opcode(vma, &fw, folio, opcode_vaddr, opcode); + ret = __uprobe_write(vma, &fw, folio, opcode_vaddr, opcode); folio_walk_end(&fw, vma); } -- cgit v1.2.3 From f8b7c528b4fb7018d12b6bb63bb52576cfc73697 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:15 +0200 Subject: uprobes: Add nbytes argument to uprobe_write Adding nbytes argument to uprobe_write and related functions as preparation for writing whole instructions in following changes. Also renaming opcode arguments to insn, which seems to fit better. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-6-jolsa@kernel.org --- include/linux/uprobes.h | 4 ++-- kernel/events/uprobes.c | 26 ++++++++++++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e13382054435..147c4a0a1af9 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -188,7 +188,7 @@ struct uprobes_state { }; typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, - uprobe_opcode_t *opcode); + uprobe_opcode_t *insn, int nbytes); extern void __init uprobes_init(void); extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); @@ -199,7 +199,7 @@ extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t opcode, uprobe_write_verify_t verify); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 838ac40e91e6..c133fd4b492d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -191,7 +191,8 @@ static void copy_to_page(struct page *page, unsigned long vaddr, const void *src kunmap_atomic(kaddr); } -static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode) +static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *insn, + int nbytes) { uprobe_opcode_t old_opcode; bool is_swbp; @@ -208,7 +209,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t uprobe_copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); is_swbp = is_swbp_insn(&old_opcode); - if (is_swbp_insn(new_opcode)) { + if (is_swbp_insn(insn)) { if (is_swbp) /* register: already installed? */ return 0; } else { @@ -401,10 +402,10 @@ static bool orig_page_is_identical(struct vm_area_struct *vma, static int __uprobe_write(struct vm_area_struct *vma, struct folio_walk *fw, struct folio *folio, - unsigned long opcode_vaddr, uprobe_opcode_t opcode) + unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes) { - const unsigned long vaddr = opcode_vaddr & PAGE_MASK; - const bool is_register = !!is_swbp_insn(&opcode); + const unsigned long vaddr = insn_vaddr & PAGE_MASK; + const bool is_register = !!is_swbp_insn(insn); bool pmd_mappable; /* For now, we'll only handle PTE-mapped folios. */ @@ -429,7 +430,7 @@ static int __uprobe_write(struct vm_area_struct *vma, */ flush_cache_page(vma, vaddr, pte_pfn(fw->pte)); fw->pte = ptep_clear_flush(vma, vaddr, fw->ptep); - copy_to_page(fw->page, opcode_vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); + copy_to_page(fw->page, insn_vaddr, insn, nbytes); /* * When unregistering, we may only zap a PTE if uffd is disabled and @@ -488,13 +489,14 @@ remap: int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, uprobe_opcode_t opcode) { - return uprobe_write(auprobe, vma, opcode_vaddr, opcode, verify_opcode); + return uprobe_write(auprobe, vma, opcode_vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, verify_opcode); } int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, - const unsigned long opcode_vaddr, uprobe_opcode_t opcode, uprobe_write_verify_t verify) + const unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes, + uprobe_write_verify_t verify) { - const unsigned long vaddr = opcode_vaddr & PAGE_MASK; + const unsigned long vaddr = insn_vaddr & PAGE_MASK; struct mm_struct *mm = vma->vm_mm; struct uprobe *uprobe; int ret, is_register, ref_ctr_updated = 0; @@ -504,7 +506,7 @@ int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, struct folio *folio; struct page *page; - is_register = is_swbp_insn(&opcode); + is_register = is_swbp_insn(insn); uprobe = container_of(auprobe, struct uprobe, arch); if (WARN_ON_ONCE(!is_cow_mapping(vma->vm_flags))) @@ -527,7 +529,7 @@ retry: goto out; folio = page_folio(page); - ret = verify(page, opcode_vaddr, &opcode); + ret = verify(page, insn_vaddr, insn, nbytes); if (ret <= 0) { folio_put(folio); goto out; @@ -566,7 +568,7 @@ retry: /* Walk the page tables again, to perform the actual update. */ if (folio_walk_start(&fw, vma, vaddr, 0)) { if (fw.page == page) - ret = __uprobe_write(vma, &fw, folio, opcode_vaddr, opcode); + ret = __uprobe_write(vma, &fw, folio, insn_vaddr, insn, nbytes); folio_walk_end(&fw, vma); } -- cgit v1.2.3 From ec46350fe1e2338f42ee84974c36b25afe8ba53a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:16 +0200 Subject: uprobes: Add is_register argument to uprobe_write and uprobe_write_opcode The uprobe_write has special path to restore the original page when we write original instruction back. This happens when uprobe_write detects that we want to write anything else but breakpoint instruction. Moving the detection away and passing it to uprobe_write as argument, so it's possible to write different instructions (other than just breakpoint and rest). Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-7-jolsa@kernel.org --- arch/arm/probes/uprobes/core.c | 2 +- include/linux/uprobes.h | 5 +++-- kernel/events/uprobes.c | 21 +++++++++++---------- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c index 885e0c5e8c20..3d96fb41d624 100644 --- a/arch/arm/probes/uprobes/core.c +++ b/arch/arm/probes/uprobes/core.c @@ -30,7 +30,7 @@ int set_swbp(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr) { return uprobe_write_opcode(auprobe, vma, vaddr, - __opcode_to_mem_arm(auprobe->bpinsn)); + __opcode_to_mem_arm(auprobe->bpinsn), true); } bool arch_uprobe_ignore(struct arch_uprobe *auprobe, struct pt_regs *regs) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 147c4a0a1af9..518b26756469 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -197,9 +197,10 @@ extern bool is_swbp_insn(uprobe_opcode_t *insn); extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); -extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); +extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t, + bool is_register); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c133fd4b492d..955e5ed3e383 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -402,10 +402,10 @@ static bool orig_page_is_identical(struct vm_area_struct *vma, static int __uprobe_write(struct vm_area_struct *vma, struct folio_walk *fw, struct folio *folio, - unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes) + unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes, + bool is_register) { const unsigned long vaddr = insn_vaddr & PAGE_MASK; - const bool is_register = !!is_swbp_insn(insn); bool pmd_mappable; /* For now, we'll only handle PTE-mapped folios. */ @@ -487,26 +487,27 @@ remap: * Return 0 (success) or a negative errno. */ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, - const unsigned long opcode_vaddr, uprobe_opcode_t opcode) + const unsigned long opcode_vaddr, uprobe_opcode_t opcode, + bool is_register) { - return uprobe_write(auprobe, vma, opcode_vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, verify_opcode); + return uprobe_write(auprobe, vma, opcode_vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, + verify_opcode, is_register); } int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes, - uprobe_write_verify_t verify) + uprobe_write_verify_t verify, bool is_register) { const unsigned long vaddr = insn_vaddr & PAGE_MASK; struct mm_struct *mm = vma->vm_mm; struct uprobe *uprobe; - int ret, is_register, ref_ctr_updated = 0; + int ret, ref_ctr_updated = 0; unsigned int gup_flags = FOLL_FORCE; struct mmu_notifier_range range; struct folio_walk fw; struct folio *folio; struct page *page; - is_register = is_swbp_insn(insn); uprobe = container_of(auprobe, struct uprobe, arch); if (WARN_ON_ONCE(!is_cow_mapping(vma->vm_flags))) @@ -568,7 +569,7 @@ retry: /* Walk the page tables again, to perform the actual update. */ if (folio_walk_start(&fw, vma, vaddr, 0)) { if (fw.page == page) - ret = __uprobe_write(vma, &fw, folio, insn_vaddr, insn, nbytes); + ret = __uprobe_write(vma, &fw, folio, insn_vaddr, insn, nbytes, is_register); folio_walk_end(&fw, vma); } @@ -610,7 +611,7 @@ out: int __weak set_swbp(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr) { - return uprobe_write_opcode(auprobe, vma, vaddr, UPROBE_SWBP_INSN); + return uprobe_write_opcode(auprobe, vma, vaddr, UPROBE_SWBP_INSN, true); } /** @@ -626,7 +627,7 @@ int __weak set_orig_insn(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr) { return uprobe_write_opcode(auprobe, vma, vaddr, - *(uprobe_opcode_t *)&auprobe->insn); + *(uprobe_opcode_t *)&auprobe->insn, false); } /* uprobe should have guaranteed positive refcount */ -- cgit v1.2.3 From 18a111256a0b4fedfe47101f084441a84d7e357a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:17 +0200 Subject: uprobes: Add do_ref_ctr argument to uprobe_write function Making update_ref_ctr call in uprobe_write conditional based on do_ref_ctr argument. This way we can use uprobe_write for instruction update without doing ref_ctr_offset update. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-8-jolsa@kernel.org --- include/linux/uprobes.h | 2 +- kernel/events/uprobes.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 518b26756469..5080619560d4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -200,7 +200,7 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t, bool is_register); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 955e5ed3e383..da2b3d0deab6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -491,12 +491,12 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, bool is_register) { return uprobe_write(auprobe, vma, opcode_vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, - verify_opcode, is_register); + verify_opcode, is_register, true /* do_update_ref_ctr */); } int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes, - uprobe_write_verify_t verify, bool is_register) + uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr) { const unsigned long vaddr = insn_vaddr & PAGE_MASK; struct mm_struct *mm = vma->vm_mm; @@ -537,7 +537,7 @@ retry: } /* We are going to replace instruction, update ref_ctr. */ - if (!ref_ctr_updated && uprobe->ref_ctr_offset) { + if (do_update_ref_ctr && !ref_ctr_updated && uprobe->ref_ctr_offset) { ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); if (ret) { folio_put(folio); @@ -589,7 +589,7 @@ retry: out: /* Revert back reference counter if instruction update failed. */ - if (ret < 0 && ref_ctr_updated) + if (do_update_ref_ctr && ret < 0 && ref_ctr_updated) update_ref_ctr(uprobe, mm, is_register ? -1 : 1); /* try collapse pmd for compound page */ -- cgit v1.2.3 From 91440ff4cafad4c86322a612e523f7f021a493e7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:18 +0200 Subject: uprobes/x86: Add mapping for optimized uprobe trampolines Adding support to add special mapping for user space trampoline with following functions: uprobe_trampoline_get - find or add uprobe_trampoline uprobe_trampoline_put - remove or destroy uprobe_trampoline The user space trampoline is exported as arch specific user space special mapping through tramp_mapping, which is initialized in following changes with new uprobe syscall. The uprobe trampoline needs to be callable/reachable from the probed address, so while searching for available address we use is_reachable_by_call function to decide if the uprobe trampoline is callable from the probe address. All uprobe_trampoline objects are stored in uprobes_state object and are cleaned up when the process mm_struct goes down. Adding new arch hooks for that, because this change is x86_64 specific. Locking is provided by callers in following changes. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250720112133.244369-9-jolsa@kernel.org --- arch/x86/kernel/uprobes.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/uprobes.h | 6 ++ kernel/events/uprobes.c | 10 ++++ kernel/fork.c | 1 + 4 files changed, 161 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 77050e5a4680..6c4dcbdd0c3c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -608,6 +608,150 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) *sr = utask->autask.saved_scratch_register; } } + +static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) +{ + return -EPERM; +} + +static struct page *tramp_mapping_pages[2] __ro_after_init; + +static struct vm_special_mapping tramp_mapping = { + .name = "[uprobes-trampoline]", + .mremap = tramp_mremap, + .pages = tramp_mapping_pages, +}; + +struct uprobe_trampoline { + struct hlist_node node; + unsigned long vaddr; +}; + +static bool is_reachable_by_call(unsigned long vtramp, unsigned long vaddr) +{ + long delta = (long)(vaddr + 5 - vtramp); + + return delta >= INT_MIN && delta <= INT_MAX; +} + +static unsigned long find_nearest_trampoline(unsigned long vaddr) +{ + struct vm_unmapped_area_info info = { + .length = PAGE_SIZE, + .align_mask = ~PAGE_MASK, + }; + unsigned long low_limit, high_limit; + unsigned long low_tramp, high_tramp; + unsigned long call_end = vaddr + 5; + + if (check_add_overflow(call_end, INT_MIN, &low_limit)) + low_limit = PAGE_SIZE; + + high_limit = call_end + INT_MAX; + + /* Search up from the caller address. */ + info.low_limit = call_end; + info.high_limit = min(high_limit, TASK_SIZE); + high_tramp = vm_unmapped_area(&info); + + /* Search down from the caller address. */ + info.low_limit = max(low_limit, PAGE_SIZE); + info.high_limit = call_end; + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + low_tramp = vm_unmapped_area(&info); + + if (IS_ERR_VALUE(high_tramp) && IS_ERR_VALUE(low_tramp)) + return -ENOMEM; + if (IS_ERR_VALUE(high_tramp)) + return low_tramp; + if (IS_ERR_VALUE(low_tramp)) + return high_tramp; + + /* Return address that's closest to the caller address. */ + if (call_end - low_tramp < high_tramp - call_end) + return low_tramp; + return high_tramp; +} + +static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr) +{ + struct pt_regs *regs = task_pt_regs(current); + struct mm_struct *mm = current->mm; + struct uprobe_trampoline *tramp; + struct vm_area_struct *vma; + + if (!user_64bit_mode(regs)) + return NULL; + + vaddr = find_nearest_trampoline(vaddr); + if (IS_ERR_VALUE(vaddr)) + return NULL; + + tramp = kzalloc(sizeof(*tramp), GFP_KERNEL); + if (unlikely(!tramp)) + return NULL; + + tramp->vaddr = vaddr; + vma = _install_special_mapping(mm, tramp->vaddr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO, + &tramp_mapping); + if (IS_ERR(vma)) { + kfree(tramp); + return NULL; + } + return tramp; +} + +__maybe_unused +static struct uprobe_trampoline *get_uprobe_trampoline(unsigned long vaddr, bool *new) +{ + struct uprobes_state *state = ¤t->mm->uprobes_state; + struct uprobe_trampoline *tramp = NULL; + + if (vaddr > TASK_SIZE || vaddr < PAGE_SIZE) + return NULL; + + hlist_for_each_entry(tramp, &state->head_tramps, node) { + if (is_reachable_by_call(tramp->vaddr, vaddr)) { + *new = false; + return tramp; + } + } + + tramp = create_uprobe_trampoline(vaddr); + if (!tramp) + return NULL; + + *new = true; + hlist_add_head(&tramp->node, &state->head_tramps); + return tramp; +} + +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp) +{ + /* + * We do not unmap and release uprobe trampoline page itself, + * because there's no easy way to make sure none of the threads + * is still inside the trampoline. + */ + hlist_del(&tramp->node); + kfree(tramp); +} + +void arch_uprobe_init_state(struct mm_struct *mm) +{ + INIT_HLIST_HEAD(&mm->uprobes_state.head_tramps); +} + +void arch_uprobe_clear_state(struct mm_struct *mm) +{ + struct uprobes_state *state = &mm->uprobes_state; + struct uprobe_trampoline *tramp; + struct hlist_node *n; + + hlist_for_each_entry_safe(tramp, n, &state->head_tramps, node) + destroy_uprobe_trampoline(tramp); +} #else /* 32-bit: */ /* * No RIP-relative addressing on 32-bit diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 5080619560d4..b40d33aae016 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -17,6 +17,7 @@ #include #include #include +#include struct uprobe; struct vm_area_struct; @@ -185,6 +186,9 @@ struct xol_area; struct uprobes_state { struct xol_area *xol_area; +#ifdef CONFIG_X86_64 + struct hlist_head head_tramps; +#endif }; typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, @@ -233,6 +237,8 @@ extern void uprobe_handle_trampoline(struct pt_regs *regs); extern void *arch_uretprobe_trampoline(unsigned long *psize); extern unsigned long uprobe_get_trampoline_vaddr(void); extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len); +extern void arch_uprobe_clear_state(struct mm_struct *mm); +extern void arch_uprobe_init_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index da2b3d0deab6..2cd7a4c6f303 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1801,6 +1801,14 @@ static struct xol_area *get_xol_area(void) return area; } +void __weak arch_uprobe_clear_state(struct mm_struct *mm) +{ +} + +void __weak arch_uprobe_init_state(struct mm_struct *mm) +{ +} + /* * uprobe_clear_state - Free the area allocated for slots. */ @@ -1812,6 +1820,8 @@ void uprobe_clear_state(struct mm_struct *mm) delayed_uprobe_remove(NULL, mm); mutex_unlock(&delayed_uprobe_lock); + arch_uprobe_clear_state(mm); + if (!area) return; diff --git a/kernel/fork.c b/kernel/fork.c index af673856499d..d827cc6c5362 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1015,6 +1015,7 @@ static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES mm->uprobes_state.xol_area = NULL; + arch_uprobe_init_state(mm); #endif } -- cgit v1.2.3 From 56101b69c9190667f473b9f93f8b6d8209aaa816 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:19 +0200 Subject: uprobes/x86: Add uprobe syscall to speed up uprobe Adding new uprobe syscall that calls uprobe handlers for given 'breakpoint' address. The idea is that the 'breakpoint' address calls the user space trampoline which executes the uprobe syscall. The syscall handler reads the return address of the initial call to retrieve the original 'breakpoint' address. With this address we find the related uprobe object and call its consumers. Adding the arch_uprobe_trampoline_mapping function that provides uprobe trampoline mapping. This mapping is backed with one global page initialized at __init time and shared by the all the mapping instances. We do not allow to execute uprobe syscall if the caller is not from uprobe trampoline mapping. The uprobe syscall ensures the consumer (bpf program) sees registers values in the state before the trampoline was called. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250720112133.244369-10-jolsa@kernel.org --- arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/x86/kernel/uprobes.c | 139 +++++++++++++++++++++++++++++++++ include/linux/syscalls.h | 2 + include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 17 ++++ kernel/sys_ni.c | 1 + 6 files changed, 161 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 92cf0fe2291e..ced2a1deecd7 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -345,6 +345,7 @@ 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq 335 common uretprobe sys_uretprobe +336 common uprobe sys_uprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 6c4dcbdd0c3c..d18e1ae59901 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -752,6 +752,145 @@ void arch_uprobe_clear_state(struct mm_struct *mm) hlist_for_each_entry_safe(tramp, n, &state->head_tramps, node) destroy_uprobe_trampoline(tramp); } + +static bool __in_uprobe_trampoline(unsigned long ip) +{ + struct vm_area_struct *vma = vma_lookup(current->mm, ip); + + return vma && vma_is_special_mapping(vma, &tramp_mapping); +} + +static bool in_uprobe_trampoline(unsigned long ip) +{ + struct mm_struct *mm = current->mm; + bool found, retry = true; + unsigned int seq; + + rcu_read_lock(); + if (mmap_lock_speculate_try_begin(mm, &seq)) { + found = __in_uprobe_trampoline(ip); + retry = mmap_lock_speculate_retry(mm, seq); + } + rcu_read_unlock(); + + if (retry) { + mmap_read_lock(mm); + found = __in_uprobe_trampoline(ip); + mmap_read_unlock(mm); + } + return found; +} + +/* + * See uprobe syscall trampoline; the call to the trampoline will push + * the return address on the stack, the trampoline itself then pushes + * cx, r11 and ax. + */ +struct uprobe_syscall_args { + unsigned long ax; + unsigned long r11; + unsigned long cx; + unsigned long retaddr; +}; + +SYSCALL_DEFINE0(uprobe) +{ + struct pt_regs *regs = task_pt_regs(current); + struct uprobe_syscall_args args; + unsigned long ip, sp; + int err; + + /* Allow execution only from uprobe trampolines. */ + if (!in_uprobe_trampoline(regs->ip)) + goto sigill; + + err = copy_from_user(&args, (void __user *)regs->sp, sizeof(args)); + if (err) + goto sigill; + + ip = regs->ip; + + /* + * expose the "right" values of ax/r11/cx/ip/sp to uprobe_consumer/s, plus: + * - adjust ip to the probe address, call saved next instruction address + * - adjust sp to the probe's stack frame (check trampoline code) + */ + regs->ax = args.ax; + regs->r11 = args.r11; + regs->cx = args.cx; + regs->ip = args.retaddr - 5; + regs->sp += sizeof(args); + regs->orig_ax = -1; + + sp = regs->sp; + + handle_syscall_uprobe(regs, regs->ip); + + /* + * Some of the uprobe consumers has changed sp, we can do nothing, + * just return via iret. + */ + if (regs->sp != sp) { + /* skip the trampoline call */ + if (args.retaddr - 5 == regs->ip) + regs->ip += 5; + return regs->ax; + } + + regs->sp -= sizeof(args); + + /* for the case uprobe_consumer has changed ax/r11/cx */ + args.ax = regs->ax; + args.r11 = regs->r11; + args.cx = regs->cx; + + /* keep return address unless we are instructed otherwise */ + if (args.retaddr - 5 != regs->ip) + args.retaddr = regs->ip; + + regs->ip = ip; + + err = copy_to_user((void __user *)regs->sp, &args, sizeof(args)); + if (err) + goto sigill; + + /* ensure sysret, see do_syscall_64() */ + regs->r11 = regs->flags; + regs->cx = regs->ip; + return 0; + +sigill: + force_sig(SIGILL); + return -1; +} + +asm ( + ".pushsection .rodata\n" + ".balign " __stringify(PAGE_SIZE) "\n" + "uprobe_trampoline_entry:\n" + "push %rcx\n" + "push %r11\n" + "push %rax\n" + "movq $" __stringify(__NR_uprobe) ", %rax\n" + "syscall\n" + "pop %rax\n" + "pop %r11\n" + "pop %rcx\n" + "ret\n" + ".balign " __stringify(PAGE_SIZE) "\n" + ".popsection\n" +); + +extern u8 uprobe_trampoline_entry[]; + +static int __init arch_uprobes_init(void) +{ + tramp_mapping_pages[0] = virt_to_page(uprobe_trampoline_entry); + return 0; +} + +late_initcall(arch_uprobes_init); + #else /* 32-bit: */ /* * No RIP-relative addressing on 32-bit diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 77f45e5d4413..66c06fcdfe19 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1005,6 +1005,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); asmlinkage long sys_uretprobe(void); +asmlinkage long sys_uprobe(void); + /* pciconfig: alpha, arm, arm64, ia64, sparc */ asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b40d33aae016..b6b077cc7d0f 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -239,6 +239,7 @@ extern unsigned long uprobe_get_trampoline_vaddr(void); extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len); extern void arch_uprobe_clear_state(struct mm_struct *mm); extern void arch_uprobe_init_state(struct mm_struct *mm); +extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2cd7a4c6f303..eb07e602b6c9 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2771,6 +2771,23 @@ out: rcu_read_unlock_trace(); } +void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr) +{ + struct uprobe *uprobe; + int is_swbp; + + guard(rcu_tasks_trace)(); + + uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp); + if (!uprobe) + return; + if (!get_utask()) + return; + if (arch_uprobe_ignore(&uprobe->arch, regs)) + return; + handler_chain(uprobe, regs); +} + /* * Perform required fix-ups and disable singlestep. * Allow pending signals to take effect. diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index c00a86931f8c..bf5d05c635ff 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -392,3 +392,4 @@ COND_SYSCALL(setuid16); COND_SYSCALL(rseq); COND_SYSCALL(uretprobe); +COND_SYSCALL(uprobe); -- cgit v1.2.3 From ba2bfc97b4629b10bd8d02b36e04f3932a04cac4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:20 +0200 Subject: uprobes/x86: Add support to optimize uprobes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Putting together all the previously added pieces to support optimized uprobes on top of 5-byte nop instruction. The current uprobe execution goes through following: - installs breakpoint instruction over original instruction - exception handler hit and calls related uprobe consumers - and either simulates original instruction or does out of line single step execution of it - returns to user space The optimized uprobe path does following: - checks the original instruction is 5-byte nop (plus other checks) - adds (or uses existing) user space trampoline with uprobe syscall - overwrites original instruction (5-byte nop) with call to user space trampoline - the user space trampoline executes uprobe syscall that calls related uprobe consumers - trampoline returns back to next instruction This approach won't speed up all uprobes as it's limited to using nop5 as original instruction, but we plan to use nop5 as USDT probe instruction (which currently uses single byte nop) and speed up the USDT probes. The arch_uprobe_optimize triggers the uprobe optimization and is called after first uprobe hit. I originally had it called on uprobe installation but then it clashed with elf loader, because the user space trampoline was added in a place where loader might need to put elf segments, so I decided to do it after first uprobe hit when loading is done. The uprobe is un-optimized in arch specific set_orig_insn call. The instruction overwrite is x86 arch specific and needs to go through 3 updates: (on top of nop5 instruction) - write int3 into 1st byte - write last 4 bytes of the call instruction - update the call instruction opcode And cleanup goes though similar reverse stages: - overwrite call opcode with breakpoint (int3) - write last 4 bytes of the nop5 instruction - write the nop5 first instruction byte We do not unmap and release uprobe trampoline when it's no longer needed, because there's no easy way to make sure none of the threads is still inside the trampoline. But we do not waste memory, because there's just single page for all the uprobe trampoline mappings. We do waste frame on page mapping for every 4GB by keeping the uprobe trampoline page mapped, but that seems ok. We take the benefit from the fact that set_swbp and set_orig_insn are called under mmap_write_lock(mm), so we can use the current instruction as the state the uprobe is in - nop5/breakpoint/call trampoline - and decide the needed action (optimize/un-optimize) based on that. Attaching the speed up from benchs/run_bench_uprobes.sh script: current: usermode-count : 152.604 ± 0.044M/s syscall-count : 13.359 ± 0.042M/s --> uprobe-nop : 3.229 ± 0.002M/s uprobe-push : 3.086 ± 0.004M/s uprobe-ret : 1.114 ± 0.004M/s uprobe-nop5 : 1.121 ± 0.005M/s uretprobe-nop : 2.145 ± 0.002M/s uretprobe-push : 2.070 ± 0.001M/s uretprobe-ret : 0.931 ± 0.001M/s uretprobe-nop5 : 0.957 ± 0.001M/s after the change: usermode-count : 152.448 ± 0.244M/s syscall-count : 14.321 ± 0.059M/s uprobe-nop : 3.148 ± 0.007M/s uprobe-push : 2.976 ± 0.004M/s uprobe-ret : 1.068 ± 0.003M/s --> uprobe-nop5 : 7.038 ± 0.007M/s uretprobe-nop : 2.109 ± 0.004M/s uretprobe-push : 2.035 ± 0.001M/s uretprobe-ret : 0.908 ± 0.001M/s uretprobe-nop5 : 3.377 ± 0.009M/s I see bit more speed up on Intel (above) compared to AMD. The big nop5 speed up is partly due to emulating nop5 and partly due to optimization. The key speed up we do this for is the USDT switch from nop to nop5: uprobe-nop : 3.148 ± 0.007M/s uprobe-nop5 : 7.038 ± 0.007M/s Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250720112133.244369-11-jolsa@kernel.org --- arch/x86/include/asm/uprobes.h | 7 + arch/x86/kernel/uprobes.c | 283 ++++++++++++++++++++++++++++++++++++++++- include/linux/uprobes.h | 6 +- kernel/events/uprobes.c | 16 ++- 4 files changed, 305 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 678fb546f0a7..1ee2e5115955 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -20,6 +20,11 @@ typedef u8 uprobe_opcode_t; #define UPROBE_SWBP_INSN 0xcc #define UPROBE_SWBP_INSN_SIZE 1 +enum { + ARCH_UPROBE_FLAG_CAN_OPTIMIZE = 0, + ARCH_UPROBE_FLAG_OPTIMIZE_FAIL = 1, +}; + struct uprobe_xol_ops; struct arch_uprobe { @@ -45,6 +50,8 @@ struct arch_uprobe { u8 ilen; } push; }; + + unsigned long flags; }; struct arch_uprobe_task { diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index d18e1ae59901..209ce74ab93f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -18,6 +18,7 @@ #include #include #include +#include /* Post-execution fixups. */ @@ -702,7 +703,6 @@ static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr) return tramp; } -__maybe_unused static struct uprobe_trampoline *get_uprobe_trampoline(unsigned long vaddr, bool *new) { struct uprobes_state *state = ¤t->mm->uprobes_state; @@ -891,6 +891,280 @@ static int __init arch_uprobes_init(void) late_initcall(arch_uprobes_init); +enum { + EXPECT_SWBP, + EXPECT_CALL, +}; + +struct write_opcode_ctx { + unsigned long base; + int expect; +}; + +static int is_call_insn(uprobe_opcode_t *insn) +{ + return *insn == CALL_INSN_OPCODE; +} + +/* + * Verification callback used by int3_update uprobe_write calls to make sure + * the underlying instruction is as expected - either int3 or call. + */ +static int verify_insn(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode, + int nbytes, void *data) +{ + struct write_opcode_ctx *ctx = data; + uprobe_opcode_t old_opcode[5]; + + uprobe_copy_from_page(page, ctx->base, (uprobe_opcode_t *) &old_opcode, 5); + + switch (ctx->expect) { + case EXPECT_SWBP: + if (is_swbp_insn(&old_opcode[0])) + return 1; + break; + case EXPECT_CALL: + if (is_call_insn(&old_opcode[0])) + return 1; + break; + } + + return -1; +} + +/* + * Modify multi-byte instructions by using INT3 breakpoints on SMP. + * We completely avoid using stop_machine() here, and achieve the + * synchronization using INT3 breakpoints and SMP cross-calls. + * (borrowed comment from smp_text_poke_batch_finish) + * + * The way it is done: + * - Add an INT3 trap to the address that will be patched + * - SMP sync all CPUs + * - Update all but the first byte of the patched range + * - SMP sync all CPUs + * - Replace the first byte (INT3) by the first byte of the replacing opcode + * - SMP sync all CPUs + */ +static int int3_update(struct arch_uprobe *auprobe, struct vm_area_struct *vma, + unsigned long vaddr, char *insn, bool optimize) +{ + uprobe_opcode_t int3 = UPROBE_SWBP_INSN; + struct write_opcode_ctx ctx = { + .base = vaddr, + }; + int err; + + /* + * Write int3 trap. + * + * The swbp_optimize path comes with breakpoint already installed, + * so we can skip this step for optimize == true. + */ + if (!optimize) { + ctx.expect = EXPECT_CALL; + err = uprobe_write(auprobe, vma, vaddr, &int3, 1, verify_insn, + true /* is_register */, false /* do_update_ref_ctr */, + &ctx); + if (err) + return err; + } + + smp_text_poke_sync_each_cpu(); + + /* Write all but the first byte of the patched range. */ + ctx.expect = EXPECT_SWBP; + err = uprobe_write(auprobe, vma, vaddr + 1, insn + 1, 4, verify_insn, + true /* is_register */, false /* do_update_ref_ctr */, + &ctx); + if (err) + return err; + + smp_text_poke_sync_each_cpu(); + + /* + * Write first byte. + * + * The swbp_unoptimize needs to finish uprobe removal together + * with ref_ctr update, using uprobe_write with proper flags. + */ + err = uprobe_write(auprobe, vma, vaddr, insn, 1, verify_insn, + optimize /* is_register */, !optimize /* do_update_ref_ctr */, + &ctx); + if (err) + return err; + + smp_text_poke_sync_each_cpu(); + return 0; +} + +static int swbp_optimize(struct arch_uprobe *auprobe, struct vm_area_struct *vma, + unsigned long vaddr, unsigned long tramp) +{ + u8 call[5]; + + __text_gen_insn(call, CALL_INSN_OPCODE, (const void *) vaddr, + (const void *) tramp, CALL_INSN_SIZE); + return int3_update(auprobe, vma, vaddr, call, true /* optimize */); +} + +static int swbp_unoptimize(struct arch_uprobe *auprobe, struct vm_area_struct *vma, + unsigned long vaddr) +{ + return int3_update(auprobe, vma, vaddr, auprobe->insn, false /* optimize */); +} + +static int copy_from_vaddr(struct mm_struct *mm, unsigned long vaddr, void *dst, int len) +{ + unsigned int gup_flags = FOLL_FORCE|FOLL_SPLIT_PMD; + struct vm_area_struct *vma; + struct page *page; + + page = get_user_page_vma_remote(mm, vaddr, gup_flags, &vma); + if (IS_ERR(page)) + return PTR_ERR(page); + uprobe_copy_from_page(page, vaddr, dst, len); + put_page(page); + return 0; +} + +static bool __is_optimized(uprobe_opcode_t *insn, unsigned long vaddr) +{ + struct __packed __arch_relative_insn { + u8 op; + s32 raddr; + } *call = (struct __arch_relative_insn *) insn; + + if (!is_call_insn(insn)) + return false; + return __in_uprobe_trampoline(vaddr + 5 + call->raddr); +} + +static int is_optimized(struct mm_struct *mm, unsigned long vaddr, bool *optimized) +{ + uprobe_opcode_t insn[5]; + int err; + + err = copy_from_vaddr(mm, vaddr, &insn, 5); + if (err) + return err; + *optimized = __is_optimized((uprobe_opcode_t *)&insn, vaddr); + return 0; +} + +static bool should_optimize(struct arch_uprobe *auprobe) +{ + return !test_bit(ARCH_UPROBE_FLAG_OPTIMIZE_FAIL, &auprobe->flags) && + test_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags); +} + +int set_swbp(struct arch_uprobe *auprobe, struct vm_area_struct *vma, + unsigned long vaddr) +{ + if (should_optimize(auprobe)) { + bool optimized = false; + int err; + + /* + * We could race with another thread that already optimized the probe, + * so let's not overwrite it with int3 again in this case. + */ + err = is_optimized(vma->vm_mm, vaddr, &optimized); + if (err) + return err; + if (optimized) + return 0; + } + return uprobe_write_opcode(auprobe, vma, vaddr, UPROBE_SWBP_INSN, + true /* is_register */); +} + +int set_orig_insn(struct arch_uprobe *auprobe, struct vm_area_struct *vma, + unsigned long vaddr) +{ + if (test_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags)) { + struct mm_struct *mm = vma->vm_mm; + bool optimized = false; + int err; + + err = is_optimized(mm, vaddr, &optimized); + if (err) + return err; + if (optimized) { + err = swbp_unoptimize(auprobe, vma, vaddr); + WARN_ON_ONCE(err); + return err; + } + } + return uprobe_write_opcode(auprobe, vma, vaddr, *(uprobe_opcode_t *)&auprobe->insn, + false /* is_register */); +} + +static int __arch_uprobe_optimize(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long vaddr) +{ + struct uprobe_trampoline *tramp; + struct vm_area_struct *vma; + bool new = false; + int err = 0; + + vma = find_vma(mm, vaddr); + if (!vma) + return -EINVAL; + tramp = get_uprobe_trampoline(vaddr, &new); + if (!tramp) + return -EINVAL; + err = swbp_optimize(auprobe, vma, vaddr, tramp->vaddr); + if (WARN_ON_ONCE(err) && new) + destroy_uprobe_trampoline(tramp); + return err; +} + +void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + uprobe_opcode_t insn[5]; + + /* + * Do not optimize if shadow stack is enabled, the return address hijack + * code in arch_uretprobe_hijack_return_addr updates wrong frame when + * the entry uprobe is optimized and the shadow stack crashes the app. + */ + if (shstk_is_enabled()) + return; + + if (!should_optimize(auprobe)) + return; + + mmap_write_lock(mm); + + /* + * Check if some other thread already optimized the uprobe for us, + * if it's the case just go away silently. + */ + if (copy_from_vaddr(mm, vaddr, &insn, 5)) + goto unlock; + if (!is_swbp_insn((uprobe_opcode_t*) &insn)) + goto unlock; + + /* + * If we fail to optimize the uprobe we set the fail bit so the + * above should_optimize will fail from now on. + */ + if (__arch_uprobe_optimize(auprobe, mm, vaddr)) + set_bit(ARCH_UPROBE_FLAG_OPTIMIZE_FAIL, &auprobe->flags); + +unlock: + mmap_write_unlock(mm); +} + +static bool can_optimize(struct arch_uprobe *auprobe, unsigned long vaddr) +{ + if (memcmp(&auprobe->insn, x86_nops[5], 5)) + return false; + /* We can't do cross page atomic writes yet. */ + return PAGE_SIZE - (vaddr & ~PAGE_MASK) >= 5; +} #else /* 32-bit: */ /* * No RIP-relative addressing on 32-bit @@ -904,6 +1178,10 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } +static bool can_optimize(struct arch_uprobe *auprobe, unsigned long vaddr) +{ + return false; +} #endif /* CONFIG_X86_64 */ struct uprobe_xol_ops { @@ -1270,6 +1548,9 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, if (ret) return ret; + if (can_optimize(auprobe, addr)) + set_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags); + ret = branch_setup_xol_ops(auprobe, &insn); if (ret != -ENOSYS) return ret; diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b6b077cc7d0f..08ef78439d0d 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -192,7 +192,7 @@ struct uprobes_state { }; typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, - uprobe_opcode_t *insn, int nbytes); + uprobe_opcode_t *insn, int nbytes, void *data); extern void __init uprobes_init(void); extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); @@ -204,7 +204,8 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t, bool is_register); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr, + void *data); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); @@ -240,6 +241,7 @@ extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void * extern void arch_uprobe_clear_state(struct mm_struct *mm); extern void arch_uprobe_init_state(struct mm_struct *mm); extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr); +extern void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index eb07e602b6c9..4a194d7c838b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -192,7 +192,7 @@ static void copy_to_page(struct page *page, unsigned long vaddr, const void *src } static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *insn, - int nbytes) + int nbytes, void *data) { uprobe_opcode_t old_opcode; bool is_swbp; @@ -491,12 +491,13 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, bool is_register) { return uprobe_write(auprobe, vma, opcode_vaddr, &opcode, UPROBE_SWBP_INSN_SIZE, - verify_opcode, is_register, true /* do_update_ref_ctr */); + verify_opcode, is_register, true /* do_update_ref_ctr */, NULL); } int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long insn_vaddr, uprobe_opcode_t *insn, int nbytes, - uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr) + uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr, + void *data) { const unsigned long vaddr = insn_vaddr & PAGE_MASK; struct mm_struct *mm = vma->vm_mm; @@ -530,7 +531,7 @@ retry: goto out; folio = page_folio(page); - ret = verify(page, insn_vaddr, insn, nbytes); + ret = verify(page, insn_vaddr, insn, nbytes, data); if (ret <= 0) { folio_put(folio); goto out; @@ -2696,6 +2697,10 @@ bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c return true; } +void __weak arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr) +{ +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -2760,6 +2765,9 @@ static void handle_swbp(struct pt_regs *regs) handler_chain(uprobe, regs); + /* Try to optimize after first hit. */ + arch_uprobe_optimize(&uprobe->arch, bp_vaddr); + if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; -- cgit v1.2.3 From 3202d6ed9368fc1e842fda73727553ae614633f8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 21 Aug 2025 15:07:50 +0200 Subject: mmc: core: Add infrastructure for undervoltage handling Implement the core infrastructure to allow MMC bus types to handle REGULATOR_EVENT_UNDER_VOLTAGE events from power regulators. This is primarily aimed at allowing devices like eMMC to perform an emergency shutdown to prevent data corruption when a power failure is imminent. This patch introduces: - A new 'handle_undervoltage' function pointer to 'struct mmc_bus_ops'. Bus drivers (e.g., for eMMC) can implement this to define their emergency procedures. - A workqueue ('uv_work') in 'struct mmc_supply' to handle the event asynchronously in a high-priority context. - A new function 'mmc_handle_undervoltage()' which is called from the workqueue. It stops the host queue to prevent races with card removal, checks for the bus op, and invokes the handler. - Functions to register and unregister the regulator notifier, intended to be called by bus drivers like 'mmc_attach_mmc' when a compatible card is detected. The notifier is only registered for the main vmmc supply, as undervoltage handling for vqmmc or vqmmc2 is not required at this time. Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20250821130751.2089587-2-o.rempel@pengutronix.de Signed-off-by: Ulf Hansson --- drivers/mmc/core/bus.c | 12 +++++++ drivers/mmc/core/core.c | 23 +++++++++++++ drivers/mmc/core/core.h | 5 +++ drivers/mmc/core/host.c | 2 ++ drivers/mmc/core/regulator.c | 77 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mmc/host.h | 11 +++++++ 6 files changed, 130 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 1cf64e0952fb..ec4f3462bf80 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -19,6 +19,7 @@ #include #include +#include #include "core.h" #include "card.h" @@ -383,6 +384,14 @@ int mmc_add_card(struct mmc_card *card) mmc_card_set_present(card); + /* + * Register for undervoltage notification if the card supports + * power-off notification, enabling emergency shutdowns. + */ + if (mmc_card_mmc(card) && + card->ext_csd.power_off_notification == EXT_CSD_POWER_ON) + mmc_regulator_register_undervoltage_notifier(card->host); + return 0; } @@ -394,6 +403,9 @@ void mmc_remove_card(struct mmc_card *card) { struct mmc_host *host = card->host; + if (mmc_card_present(card)) + mmc_regulator_unregister_undervoltage_notifier(host); + mmc_remove_card_debugfs(card); if (mmc_card_present(card)) { diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 88fd231fee1d..860378bea557 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1398,6 +1398,29 @@ void mmc_power_cycle(struct mmc_host *host, u32 ocr) mmc_power_up(host, ocr); } +/** + * mmc_handle_undervoltage - Handle an undervoltage event on the MMC bus + * @host: The MMC host that detected the undervoltage condition + * + * This function is called when an undervoltage event is detected on one of + * the MMC regulators. + * + * Returns: 0 on success or a negative error code on failure. + */ +int mmc_handle_undervoltage(struct mmc_host *host) +{ + /* Stop the host to prevent races with card removal */ + __mmc_stop_host(host); + + if (!host->bus_ops || !host->bus_ops->handle_undervoltage) + return 0; + + dev_warn(mmc_dev(host), "%s: Undervoltage detected, initiating emergency stop\n", + mmc_hostname(host)); + + return host->bus_ops->handle_undervoltage(host); +} + /* * Assign a mmc bus handler to a host. Only one bus handler may control a * host at any given time. diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 73f5d3d8c77d..a028b48be164 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -31,6 +31,7 @@ struct mmc_bus_ops { int (*sw_reset)(struct mmc_host *); bool (*cache_enabled)(struct mmc_host *); int (*flush_cache)(struct mmc_host *); + int (*handle_undervoltage)(struct mmc_host *host); }; void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops); @@ -59,6 +60,10 @@ void mmc_power_off(struct mmc_host *host); void mmc_power_cycle(struct mmc_host *host, u32 ocr); void mmc_set_initial_state(struct mmc_host *host); u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max); +int mmc_handle_undervoltage(struct mmc_host *host); +void mmc_regulator_register_undervoltage_notifier(struct mmc_host *host); +void mmc_regulator_unregister_undervoltage_notifier(struct mmc_host *host); +void mmc_undervoltage_workfn(struct work_struct *work); static inline void mmc_delay(unsigned int ms) { diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index f14671ea5716..5f0ec23aeff5 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -564,6 +564,8 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) INIT_WORK(&host->sdio_irq_work, sdio_irq_work); timer_setup(&host->retune_timer, mmc_retune_timer, 0); + INIT_WORK(&host->supply.uv_work, mmc_undervoltage_workfn); + /* * By default, hosts do not support SGIO or large requests. * They have to set these according to their abilities. diff --git a/drivers/mmc/core/regulator.c b/drivers/mmc/core/regulator.c index 3dae2e9b7978..a85179f1a4de 100644 --- a/drivers/mmc/core/regulator.c +++ b/drivers/mmc/core/regulator.c @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -262,6 +263,82 @@ static inline int mmc_regulator_get_ocrmask(struct regulator *supply) #endif /* CONFIG_REGULATOR */ +/* To be called from a high-priority workqueue */ +void mmc_undervoltage_workfn(struct work_struct *work) +{ + struct mmc_supply *supply; + struct mmc_host *host; + + supply = container_of(work, struct mmc_supply, uv_work); + host = container_of(supply, struct mmc_host, supply); + + mmc_handle_undervoltage(host); +} + +static int mmc_handle_regulator_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct mmc_supply *supply = container_of(nb, struct mmc_supply, + vmmc_nb); + struct mmc_host *host = container_of(supply, struct mmc_host, supply); + unsigned long flags; + + switch (event) { + case REGULATOR_EVENT_UNDER_VOLTAGE: + spin_lock_irqsave(&host->lock, flags); + if (host->undervoltage) { + spin_unlock_irqrestore(&host->lock, flags); + return NOTIFY_OK; + } + + host->undervoltage = true; + spin_unlock_irqrestore(&host->lock, flags); + + queue_work(system_highpri_wq, &host->supply.uv_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +/** + * mmc_regulator_register_undervoltage_notifier - Register for undervoltage + * events + * @host: MMC host + * + * To be called by a bus driver when a card supporting graceful shutdown + * is attached. + */ +void mmc_regulator_register_undervoltage_notifier(struct mmc_host *host) +{ + int ret; + + if (IS_ERR_OR_NULL(host->supply.vmmc)) + return; + + host->supply.vmmc_nb.notifier_call = mmc_handle_regulator_event; + ret = regulator_register_notifier(host->supply.vmmc, + &host->supply.vmmc_nb); + if (ret) + dev_warn(mmc_dev(host), "Failed to register vmmc notifier: %d\n", ret); +} + +/** + * mmc_regulator_unregister_undervoltage_notifier - Unregister undervoltage + * notifier + * @host: MMC host + */ +void mmc_regulator_unregister_undervoltage_notifier(struct mmc_host *host) +{ + if (IS_ERR_OR_NULL(host->supply.vmmc)) + return; + + regulator_unregister_notifier(host->supply.vmmc, &host->supply.vmmc_nb); + cancel_work_sync(&host->supply.uv_work); +} + /** * mmc_regulator_get_supply - try to get VMMC and VQMMC regulators for a host * @mmc: the host to regulate diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 5ed5d203de23..e0d935a4ac1d 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -337,11 +337,15 @@ struct mmc_slot { struct regulator; struct mmc_pwrseq; +struct notifier_block; struct mmc_supply { struct regulator *vmmc; /* Card power supply */ struct regulator *vqmmc; /* Optional Vccq supply */ struct regulator *vqmmc2; /* Optional supply for phy */ + + struct notifier_block vmmc_nb; /* Notifier for vmmc */ + struct work_struct uv_work; /* Undervoltage work */ }; struct mmc_ctx { @@ -494,6 +498,13 @@ struct mmc_host { unsigned int can_dma_map_merge:1; /* merging can be used */ unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */ + /* + * Indicates if an undervoltage event has already been handled. + * This prevents repeated regulator notifiers from triggering + * multiple REGULATOR_EVENT_UNDER_VOLTAGE events. + */ + unsigned int undervoltage:1; /* Undervoltage state */ + int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ -- cgit v1.2.3 From f41345f47fb267a9c95ca710c33448f8d0d81d83 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 20 Aug 2025 15:18:06 +0200 Subject: bpf: Use tnums for JEQ/JNE is_branch_taken logic In the following toy program (reg states minimized for readability), R0 and R1 always have different values at instruction 6. This is obvious when reading the program but cannot be guessed from ranges alone as they overlap (R0 in [0; 0xc0000000], R1 in [1024; 0xc0000400]). 0: call bpf_get_prandom_u32#7 ; R0_w=scalar() 1: w0 = w0 ; R0_w=scalar(var_off=(0x0; 0xffffffff)) 2: r0 >>= 30 ; R0_w=scalar(var_off=(0x0; 0x3)) 3: r0 <<= 30 ; R0_w=scalar(var_off=(0x0; 0xc0000000)) 4: r1 = r0 ; R1_w=scalar(var_off=(0x0; 0xc0000000)) 5: r1 += 1024 ; R1_w=scalar(var_off=(0x400; 0xc0000000)) 6: if r1 != r0 goto pc+1 Looking at tnums however, we can deduce that R1 is always different from R0 because their tnums don't agree on known bits. This patch uses this logic to improve is_scalar_branch_taken in case of BPF_JEQ and BPF_JNE. This change has a tiny impact on complexity, which was measured with the Cilium complexity CI test. That test covers 72 programs with various build and load time configurations for a total of 970 test cases. For 80% of test cases, the patch has no impact. On the other test cases, the patch decreases complexity by only 0.08% on average. In the best case, the verifier needs to walk 3% less instructions and, in the worst case, 1.5% more. Overall, the patch has a small positive impact, especially for our largest programs. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/be3ee70b6e489c49881cb1646114b1d861b5c334.1755694147.git.paul.chaignon@gmail.com --- include/linux/tnum.h | 3 +++ kernel/bpf/tnum.c | 8 ++++++++ kernel/bpf/verifier.c | 4 ++++ 3 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 57ed3035cc30..0ffb77ffe0e8 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -51,6 +51,9 @@ struct tnum tnum_xor(struct tnum a, struct tnum b); /* Multiply two tnums, return @a * @b */ struct tnum tnum_mul(struct tnum a, struct tnum b); +/* Return true if the known bits of both tnums have the same value */ +bool tnum_overlap(struct tnum a, struct tnum b); + /* Return a tnum representing numbers satisfying both @a and @b */ struct tnum tnum_intersect(struct tnum a, struct tnum b); diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index fa353c5d550f..d9328bbb3680 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -143,6 +143,14 @@ struct tnum tnum_mul(struct tnum a, struct tnum b) return tnum_add(TNUM(acc_v, 0), acc_m); } +bool tnum_overlap(struct tnum a, struct tnum b) +{ + u64 mu; + + mu = ~a.mask & ~b.mask; + return (a.value & mu) == (b.value & mu); +} + /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has * a 'known 0' - this will return a 'known 1' for that bit. */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4e47992361ea..5c9dd16b2c56 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15897,6 +15897,8 @@ static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_sta */ if (tnum_is_const(t1) && tnum_is_const(t2)) return t1.value == t2.value; + if (!tnum_overlap(t1, t2)) + return 0; /* non-overlapping ranges */ if (umin1 > umax2 || umax1 < umin2) return 0; @@ -15921,6 +15923,8 @@ static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_sta */ if (tnum_is_const(t1) && tnum_is_const(t2)) return t1.value != t2.value; + if (!tnum_overlap(t1, t2)) + return 1; /* non-overlapping ranges */ if (umin1 > umax2 || umax1 < umin2) return 1; -- cgit v1.2.3 From afa3701c0e45ecb9e4d160048ca4e353c7489948 Mon Sep 17 00:00:00 2001 From: Tiffany Yang Date: Thu, 21 Aug 2025 18:37:52 -0700 Subject: cgroup: cgroup.stat.local time accounting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There isn't yet a clear way to identify a set of "lost" time that everyone (or at least a wider group of users) cares about. However, users can perform some delay accounting by iterating over components of interest. This patch allows cgroup v2 freezing time to be one of those components. Track the cumulative time that each v2 cgroup spends freezing and expose it to userland via a new local stat file in cgroupfs. Thank you to Michal, who provided the ASCII art in the updated documentation. To access this value: $ mkdir /sys/fs/cgroup/test $ cat /sys/fs/cgroup/test/cgroup.stat.local freeze_time_total 0 Ensure consistent freeze time reads with freeze_seq, a per-cgroup sequence counter. Writes are serialized using the css_set_lock. Signed-off-by: Tiffany Yang Cc: Tejun Heo Cc: Michal Koutný Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 18 ++++++++++++++++++ include/linux/cgroup-defs.h | 17 +++++++++++++++++ kernel/cgroup/cgroup.c | 28 ++++++++++++++++++++++++++++ kernel/cgroup/freezer.c | 16 ++++++++++++---- 4 files changed, 75 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index d9d3cc7df348..9a3a909ee40b 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1001,6 +1001,24 @@ All cgroup core files are prefixed with "cgroup." Total number of dying cgroup subsystems (e.g. memory cgroup) at and beneath the current cgroup. + cgroup.stat.local + A read-only flat-keyed file which exists in non-root cgroups. + The following entry is defined: + + frozen_usec + Cumulative time that this cgroup has spent between freezing and + thawing, regardless of whether by self or ancestor groups. + NB: (not) reaching "frozen" state is not accounted here. + + Using the following ASCII representation of a cgroup's freezer + state, :: + + 1 _____ + frozen 0 __/ \__ + ab cd + + the duration being measured is the span between a and c. + cgroup.freeze A read-write single value file which exists on non-root cgroups. Allowed values are "0" and "1". The default is "0". diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6b93a64115fe..539c64eeef38 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -433,6 +433,23 @@ struct cgroup_freezer_state { * frozen, SIGSTOPped, and PTRACEd. */ int nr_frozen_tasks; + + /* Freeze time data consistency protection */ + seqcount_t freeze_seq; + + /* + * Most recent time the cgroup was requested to freeze. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 freeze_start_nsec; + + /* + * Total duration the cgroup has spent freezing. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 frozen_nsec; }; struct cgroup { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 312c6a8b55bb..ab096b884bbc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3763,6 +3763,27 @@ static int cgroup_stat_show(struct seq_file *seq, void *v) return 0; } +static int cgroup_core_local_stat_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + unsigned int sequence; + u64 freeze_time; + + do { + sequence = read_seqcount_begin(&cgrp->freezer.freeze_seq); + freeze_time = cgrp->freezer.frozen_nsec; + /* Add in current freezer interval if the cgroup is freezing. */ + if (test_bit(CGRP_FREEZE, &cgrp->flags)) + freeze_time += (ktime_get_ns() - + cgrp->freezer.freeze_start_nsec); + } while (read_seqcount_retry(&cgrp->freezer.freeze_seq, sequence)); + + seq_printf(seq, "frozen_usec %llu\n", + (unsigned long long) freeze_time / NSEC_PER_USEC); + + return 0; +} + #ifdef CONFIG_CGROUP_SCHED /** * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem @@ -5354,6 +5375,11 @@ static struct cftype cgroup_base_files[] = { .name = "cgroup.stat", .seq_show = cgroup_stat_show, }, + { + .name = "cgroup.stat.local", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = cgroup_core_local_stat_show, + }, { .name = "cgroup.freeze", .flags = CFTYPE_NOT_ON_ROOT, @@ -5763,6 +5789,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, * if the parent has to be frozen, the child has too. */ cgrp->freezer.e_freeze = parent->freezer.e_freeze; + seqcount_init(&cgrp->freezer.freeze_seq); if (cgrp->freezer.e_freeze) { /* * Set the CGRP_FREEZE flag, so when a process will be @@ -5771,6 +5798,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, * consider it frozen immediately. */ set_bit(CGRP_FREEZE, &cgrp->flags); + cgrp->freezer.freeze_start_nsec = ktime_get_ns(); set_bit(CGRP_FROZEN, &cgrp->flags); } diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index bf1690a167dd..6c18854bff34 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c @@ -171,7 +171,7 @@ static void cgroup_freeze_task(struct task_struct *task, bool freeze) /* * Freeze or unfreeze all tasks in the given cgroup. */ -static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) +static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec) { struct css_task_iter it; struct task_struct *task; @@ -179,10 +179,16 @@ static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - if (freeze) + write_seqcount_begin(&cgrp->freezer.freeze_seq); + if (freeze) { set_bit(CGRP_FREEZE, &cgrp->flags); - else + cgrp->freezer.freeze_start_nsec = ts_nsec; + } else { clear_bit(CGRP_FREEZE, &cgrp->flags); + cgrp->freezer.frozen_nsec += (ts_nsec - + cgrp->freezer.freeze_start_nsec); + } + write_seqcount_end(&cgrp->freezer.freeze_seq); spin_unlock_irq(&css_set_lock); if (freeze) @@ -260,6 +266,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze) struct cgroup *parent; struct cgroup *dsct; bool applied = false; + u64 ts_nsec; bool old_e; lockdep_assert_held(&cgroup_mutex); @@ -271,6 +278,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze) return; cgrp->freezer.freeze = freeze; + ts_nsec = ktime_get_ns(); /* * Propagate changes downwards the cgroup tree. @@ -298,7 +306,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze) /* * Do change actual state: freeze or unfreeze. */ - cgroup_do_freeze(dsct, freeze); + cgroup_do_freeze(dsct, freeze, ts_nsec); applied = true; } -- cgit v1.2.3 From d47cc4dea17391c99b943fa8d70a279e906b2843 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Aug 2025 13:16:15 -0700 Subject: bpf: Use sha1() instead of sha1_transform() in bpf_prog_calc_tag() Now that there's a proper SHA-1 library API, just use that instead of the low-level SHA-1 compression function. This eliminates the need for bpf_prog_calc_tag() to implement the SHA-1 padding itself. No functional change; the computed tags remain the same. Signed-off-by: Eric Biggers Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250811201615.564461-1-ebiggers@kernel.org --- include/linux/filter.h | 6 ------ kernel/bpf/core.c | 50 +++++++++----------------------------------------- 2 files changed, 9 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index c0a74fb9fcb1..9092d8ea95c8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -997,12 +997,6 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) return prog->len * sizeof(struct bpf_insn); } -static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog) -{ - return round_up(bpf_prog_insn_size(prog) + - sizeof(__be64) + 1, SHA1_BLOCK_SIZE); -} - static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5d1650af899d..ef01cc644a96 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -293,28 +294,19 @@ void __bpf_prog_free(struct bpf_prog *fp) int bpf_prog_calc_tag(struct bpf_prog *fp) { - const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64); - u32 raw_size = bpf_prog_tag_scratch_size(fp); - u32 digest[SHA1_DIGEST_WORDS]; - u32 ws[SHA1_WORKSPACE_WORDS]; - u32 i, bsize, psize, blocks; + size_t size = bpf_prog_insn_size(fp); + u8 digest[SHA1_DIGEST_SIZE]; struct bpf_insn *dst; bool was_ld_map; - u8 *raw, *todo; - __be32 *result; - __be64 *bits; + u32 i; - raw = vmalloc(raw_size); - if (!raw) + dst = vmalloc(size); + if (!dst) return -ENOMEM; - sha1_init_raw(digest); - memset(ws, 0, sizeof(ws)); - /* We need to take out the map fd for the digest calculation * since they are unstable from user space side. */ - dst = (void *)raw; for (i = 0, was_ld_map = false; i < fp->len; i++) { dst[i] = fp->insnsi[i]; if (!was_ld_map && @@ -334,33 +326,9 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) was_ld_map = false; } } - - psize = bpf_prog_insn_size(fp); - memset(&raw[psize], 0, raw_size - psize); - raw[psize++] = 0x80; - - bsize = round_up(psize, SHA1_BLOCK_SIZE); - blocks = bsize / SHA1_BLOCK_SIZE; - todo = raw; - if (bsize - psize >= sizeof(__be64)) { - bits = (__be64 *)(todo + bsize - sizeof(__be64)); - } else { - bits = (__be64 *)(todo + bsize + bits_offset); - blocks++; - } - *bits = cpu_to_be64((psize - 1) << 3); - - while (blocks--) { - sha1_transform(digest, todo, ws); - todo += SHA1_BLOCK_SIZE; - } - - result = (__force __be32 *)digest; - for (i = 0; i < SHA1_DIGEST_WORDS; i++) - result[i] = cpu_to_be32(digest[i]); - memcpy(fp->tag, result, sizeof(fp->tag)); - - vfree(raw); + sha1((const u8 *)dst, size, digest); + memcpy(fp->tag, digest, sizeof(fp->tag)); + vfree(dst); return 0; } -- cgit v1.2.3 From 3c716487936aa54083c130d46ad5747769695e09 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 14 Aug 2025 18:59:49 +0200 Subject: genirq: Remove GENERIC_IRQ_LEGACY IA64 is gone and with it the last GENERIC_IRQ_LEGACY user. Remove GENERIC_IRQ_LEGACY. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250814165949.hvtP03r4@linutronix.de --- include/linux/irq.h | 4 ---- kernel/irq/Kconfig | 4 ---- kernel/irq/irqdesc.c | 7 ------- 3 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1d6b606a81ef..c9bcdbf6bc63 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -976,10 +976,6 @@ static inline void irq_free_desc(unsigned int irq) irq_free_descs(irq, 1); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq); -#endif - /** * struct irq_chip_regs - register offsets for struct irq_gci * @enable: Enable register offset to reg_base diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 1da5e9d9da71..36673640c4fc 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -6,10 +6,6 @@ menu "IRQ subsystem" config MAY_HAVE_SPARSE_IRQ bool -# Legacy support, required for itanic -config GENERIC_IRQ_LEGACY - bool - # Enable the generic irq autoprobe mechanism config GENERIC_IRQ_PROBE bool diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index b64c57b44c20..db714d3014b5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -653,13 +653,6 @@ void irq_mark_irq(unsigned int irq) irq_insert_desc(irq, irq_desc + irq); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq) -{ - free_desc(irq); -} -#endif - #endif /* !CONFIG_SPARSE_IRQ */ int handle_irq_desc(struct irq_desc *desc) -- cgit v1.2.3 From 7a721a2fee2bce01af26699a87739db8ca8ea3c8 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 14 Aug 2025 07:28:31 +0800 Subject: genirq: Add irq_chip_(startup/shutdown)_parent() As the MSI controller on SG2044 uses PLIC as the underlying interrupt controller, it needs to call irq_enable() and irq_disable() to startup/shutdown interrupts. Otherwise, the MSI interrupt can not be startup correctly and will not respond any incoming interrupt. Introduce irq_chip_startup_parent() and irq_chip_shutdown_parent() to allow the interrupt controller to call the irq_startup()/irq_shutdown() callbacks of the parent interrupt chip. In case the irq_startup()/irq_shutdown() callbacks are not implemented for the parent interrupt chip, this will fallback to irq_chip_enable_parent() or irq_chip_disable_parent(). Suggested-by: Thomas Gleixner Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Tested-by: Chen Wang # Pioneerbox Reviewed-by: Chen Wang Link: https://lore.kernel.org/all/20250813232835.43458-2-inochiama@gmail.com Link: https://lore.kernel.org/lkml/20250722224513.22125-1-inochiama@gmail.com/ --- include/linux/irq.h | 2 ++ kernel/irq/chip.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c9bcdbf6bc63..c67e76fbcc07 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -669,6 +669,8 @@ extern int irq_chip_set_parent_state(struct irq_data *data, extern int irq_chip_get_parent_state(struct irq_data *data, enum irqchip_irq_state which, bool *state); +extern void irq_chip_shutdown_parent(struct irq_data *data); +extern unsigned int irq_chip_startup_parent(struct irq_data *data); extern void irq_chip_enable_parent(struct irq_data *data); extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 0d0276378c70..3ffa0d80ddd1 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1259,6 +1259,43 @@ int irq_chip_get_parent_state(struct irq_data *data, } EXPORT_SYMBOL_GPL(irq_chip_get_parent_state); +/** + * irq_chip_shutdown_parent - Shutdown the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_shutdown() callback of the parent if available or falls + * back to irq_chip_disable_parent(). + */ +void irq_chip_shutdown_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_shutdown) + parent->chip->irq_shutdown(parent); + else + irq_chip_disable_parent(data); +} +EXPORT_SYMBOL_GPL(irq_chip_shutdown_parent); + +/** + * irq_chip_startup_parent - Startup the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_startup() callback of the parent if available or falls + * back to irq_chip_enable_parent(). + */ +unsigned int irq_chip_startup_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_startup) + return parent->chip->irq_startup(parent); + + irq_chip_enable_parent(data); + return 0; +} +EXPORT_SYMBOL_GPL(irq_chip_startup_parent); + /** * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if * NULL) -- cgit v1.2.3 From 54f45a30c0d0153d2be091ba2d683ab6db6d1d5b Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 14 Aug 2025 07:28:32 +0800 Subject: PCI/MSI: Add startup/shutdown for per device domains As the RISC-V PLIC cannot apply affinity settings without invoking irq_enable(), it will make the interrupt unavailble when used as an underlying interrupt chip for the MSI controller. Implement the irq_startup() and irq_shutdown() callbacks for the PCI MSI and MSI-X templates. For chips that specify MSI_FLAG_PCI_MSI_STARTUP_PARENT, the parent startup and shutdown functions are invoked. That allows the interrupt on the parent chip to be enabled if the interrupt has not been enabled during allocation. This is necessary for MSI controllers which use PLIC as underlying parent interrupt chip. Suggested-by: Thomas Gleixner Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Tested-by: Chen Wang # Pioneerbox Reviewed-by: Chen Wang Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/all/20250813232835.43458-3-inochiama@gmail.com --- drivers/pci/msi/irqdomain.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/msi.h | 2 ++ 2 files changed, 54 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 0938ef7ebabf..e0a800f918e8 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -148,6 +148,23 @@ static void pci_device_domain_set_desc(msi_alloc_info_t *arg, struct msi_desc *d arg->hwirq = desc->msi_index; } +static void cond_shutdown_parent(struct irq_data *data) +{ + struct msi_domain_info *info = data->domain->host_data; + + if (unlikely(info->flags & MSI_FLAG_PCI_MSI_STARTUP_PARENT)) + irq_chip_shutdown_parent(data); +} + +static unsigned int cond_startup_parent(struct irq_data *data) +{ + struct msi_domain_info *info = data->domain->host_data; + + if (unlikely(info->flags & MSI_FLAG_PCI_MSI_STARTUP_PARENT)) + return irq_chip_startup_parent(data); + return 0; +} + static __always_inline void cond_mask_parent(struct irq_data *data) { struct msi_domain_info *info = data->domain->host_data; @@ -164,6 +181,23 @@ static __always_inline void cond_unmask_parent(struct irq_data *data) irq_chip_unmask_parent(data); } +static void pci_irq_shutdown_msi(struct irq_data *data) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + + pci_msi_mask(desc, BIT(data->irq - desc->irq)); + cond_shutdown_parent(data); +} + +static unsigned int pci_irq_startup_msi(struct irq_data *data) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + unsigned int ret = cond_startup_parent(data); + + pci_msi_unmask(desc, BIT(data->irq - desc->irq)); + return ret; +} + static void pci_irq_mask_msi(struct irq_data *data) { struct msi_desc *desc = irq_data_get_msi_desc(data); @@ -194,6 +228,8 @@ static void pci_irq_unmask_msi(struct irq_data *data) static const struct msi_domain_template pci_msi_template = { .chip = { .name = "PCI-MSI", + .irq_startup = pci_irq_startup_msi, + .irq_shutdown = pci_irq_shutdown_msi, .irq_mask = pci_irq_mask_msi, .irq_unmask = pci_irq_unmask_msi, .irq_write_msi_msg = pci_msi_domain_write_msg, @@ -210,6 +246,20 @@ static const struct msi_domain_template pci_msi_template = { }, }; +static void pci_irq_shutdown_msix(struct irq_data *data) +{ + pci_msix_mask(irq_data_get_msi_desc(data)); + cond_shutdown_parent(data); +} + +static unsigned int pci_irq_startup_msix(struct irq_data *data) +{ + unsigned int ret = cond_startup_parent(data); + + pci_msix_unmask(irq_data_get_msi_desc(data)); + return ret; +} + static void pci_irq_mask_msix(struct irq_data *data) { pci_msix_mask(irq_data_get_msi_desc(data)); @@ -234,6 +284,8 @@ EXPORT_SYMBOL_GPL(pci_msix_prepare_desc); static const struct msi_domain_template pci_msix_template = { .chip = { .name = "PCI-MSIX", + .irq_startup = pci_irq_startup_msix, + .irq_shutdown = pci_irq_shutdown_msix, .irq_mask = pci_irq_mask_msix, .irq_unmask = pci_irq_unmask_msix, .irq_write_msi_msg = pci_msi_domain_write_msg, diff --git a/include/linux/msi.h b/include/linux/msi.h index e5e86a8529fb..3111ba95fbde 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -568,6 +568,8 @@ enum { MSI_FLAG_PARENT_PM_DEV = (1 << 8), /* Support for parent mask/unmask */ MSI_FLAG_PCI_MSI_MASK_PARENT = (1 << 9), + /* Support for parent startup/shutdown */ + MSI_FLAG_PCI_MSI_STARTUP_PARENT = (1 << 10), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), -- cgit v1.2.3 From 92a96b0a227e91dc42475265a1ce766b6cd044fa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 17 Aug 2025 23:09:18 +0100 Subject: io_uring: add request poisoning Poison various request fields on free. __io_req_caches_free() is a slow path, so can be done unconditionally, but gate it on kasan for io_req_add_to_cache(). Note that some fields are logically retained between cache allocations and can't be poisoned in io_req_add_to_cache(). Ideally, it'd be replaced with KASAN'ed caches, but that can't be enabled because of some synchronisation nuances. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7a78e8a7f5be434313c400650b862e36c211b312.1755459452.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/poison.h | 3 +++ io_uring/io_uring.c | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index 8ca2235f78d5..299e2dd7da6d 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -90,4 +90,7 @@ /********** lib/stackdepot.c **********/ #define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA)) +/********** io_uring/ **********/ +#define IO_URING_PTR_POISON ((void *)(0x1091UL + POISON_POINTER_DELTA)) + #endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 93633613a165..e511949086dd 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -179,6 +179,26 @@ static const struct ctl_table kernel_io_uring_disabled_table[] = { }; #endif +static void io_poison_cached_req(struct io_kiocb *req) +{ + req->ctx = IO_URING_PTR_POISON; + req->tctx = IO_URING_PTR_POISON; + req->file = IO_URING_PTR_POISON; + req->creds = IO_URING_PTR_POISON; + req->io_task_work.func = IO_URING_PTR_POISON; + req->apoll = IO_URING_PTR_POISON; +} + +static void io_poison_req(struct io_kiocb *req) +{ + io_poison_cached_req(req); + req->async_data = IO_URING_PTR_POISON; + req->kbuf = IO_URING_PTR_POISON; + req->comp_list.next = IO_URING_PTR_POISON; + req->file_node = IO_URING_PTR_POISON; + req->link = IO_URING_PTR_POISON; +} + static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) { return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); @@ -235,6 +255,8 @@ static inline void req_fail_link_node(struct io_kiocb *req, int res) static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx) { + if (IS_ENABLED(CONFIG_KASAN)) + io_poison_cached_req(req); wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list); } @@ -2767,6 +2789,7 @@ static __cold void __io_req_caches_free(struct io_ring_ctx *ctx) while (!io_req_cache_empty(ctx)) { req = io_extract_req(ctx); + io_poison_req(req); kmem_cache_free(req_cachep, req); nr++; } -- cgit v1.2.3 From 5fda51255439addd1c9059098e30847a375a1008 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Aug 2025 20:03:39 -0600 Subject: io_uring/kbuf: switch to storing struct io_buffer_list locally Currently the buffer list is stored in struct io_kiocb. The buffer list can be of two types: 1) Classic/legacy buffer list. These don't need to get referenced after a buffer pick, and hence storing them in struct io_kiocb is perfectly fine. 2) Ring provided buffer lists. These DO need to be referenced after the initial buffer pick, as they need to get consumed later on. This can be either just incrementing the head of the ring, or it can be consuming parts of a buffer if incremental buffer consumptions has been configured. For case 2, io_uring needs to be careful not to access the buffer list after the initial pick-and-execute context. The core does recycling of these, but it's easy to make a mistake, because it's stored in the io_kiocb which does persist across multiple execution contexts. Either because it's a multishot request, or simply because it needed some kind of async trigger (eg poll) for retry purposes. Add a struct io_buffer_list to struct io_br_sel, which is always on stack for the various users of it. This prevents the buffer list from leaking outside of that execution context, and additionally it enables kbuf to not even pass back the struct io_buffer_list if the given context isn't appropriately locked already. This doesn't fix any bugs, it's simply a defensive measure to prevent any issues with reuse of a buffer list. Link: https://lore.kernel.org/r/20250821020750.598432-12-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 ------ io_uring/io_uring.c | 6 +++--- io_uring/kbuf.c | 27 ++++++++++++++----------- io_uring/kbuf.h | 16 ++++++--------- io_uring/net.c | 46 ++++++++++++++++-------------------------- io_uring/poll.c | 6 +++--- io_uring/rw.c | 22 ++++++++++---------- 7 files changed, 55 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 80a178f3d896..1d33984611bc 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -674,12 +674,6 @@ struct io_kiocb { /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ struct io_buffer *kbuf; - /* - * stores buffer ID for ring provided buffers, valid IFF - * REQ_F_BUFFER_RING is set. - */ - struct io_buffer_list *buf_list; - struct io_rsrc_node *buf_node; }; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 27bc1486f07b..985b4681e513 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1007,7 +1007,7 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res) lockdep_assert_held(&req->ctx->uring_lock); req_set_fail(req); - io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list)); + io_req_set_res(req, res, io_put_kbuf(req, res, NULL)); if (def->fail) def->fail(req); io_req_complete_defer(req); @@ -2025,11 +2025,11 @@ fail: switch (io_arm_poll_handler(req, 0)) { case IO_APOLL_READY: - io_kbuf_recycle(req, req->buf_list, 0); + io_kbuf_recycle(req, NULL, 0); io_req_task_queue(req); break; case IO_APOLL_ABORTED: - io_kbuf_recycle(req, req->buf_list, 0); + io_kbuf_recycle(req, NULL, 0); io_queue_iowq(req); break; case IO_APOLL_OK: diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 21c12c437ab9..3e9aab21af9d 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -171,8 +171,8 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, if (*len == 0 || *len > buf->len) *len = buf->len; req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; - req->buf_list = bl; req->buf_index = buf->bid; + sel.buf_list = bl; sel.addr = u64_to_user_ptr(buf->addr); if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { @@ -186,8 +186,8 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, * the transfer completes (or if we get -EAGAIN and must poll of * retry). */ - io_kbuf_commit(req, bl, *len, 1); - req->buf_list = NULL; + io_kbuf_commit(req, sel.buf_list, *len, 1); + sel.buf_list = NULL; } return sel; } @@ -294,7 +294,6 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, req->flags |= REQ_F_BL_EMPTY; req->flags |= REQ_F_BUFFER_RING; - req->buf_list = bl; return iov - arg->iovs; } @@ -302,16 +301,15 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, struct io_br_sel *sel, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; - struct io_buffer_list *bl; int ret = -ENOENT; io_ring_submit_lock(ctx, issue_flags); - bl = io_buffer_get_list(ctx, arg->buf_group); - if (unlikely(!bl)) + sel->buf_list = io_buffer_get_list(ctx, arg->buf_group); + if (unlikely(!sel->buf_list)) goto out_unlock; - if (bl->flags & IOBL_BUF_RING) { - ret = io_ring_buffers_peek(req, arg, bl); + if (sel->buf_list->flags & IOBL_BUF_RING) { + ret = io_ring_buffers_peek(req, arg, sel->buf_list); /* * Don't recycle these buffers if we need to go through poll. * Nobody else can use them anyway, and holding on to provided @@ -321,13 +319,16 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, */ if (ret > 0) { req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE; - io_kbuf_commit(req, bl, arg->out_len, ret); + io_kbuf_commit(req, sel->buf_list, arg->out_len, ret); } } else { - ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs); + ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs); } out_unlock: - io_ring_submit_unlock(ctx, issue_flags); + if (issue_flags & IO_URING_F_UNLOCKED) { + sel->buf_list = NULL; + mutex_unlock(&ctx->uring_lock); + } return ret; } @@ -348,10 +349,12 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, ret = io_ring_buffers_peek(req, arg, bl); if (ret > 0) req->flags |= REQ_F_BUFFERS_COMMIT; + sel->buf_list = bl; return ret; } /* don't support multiple buffer selections for legacy */ + sel->buf_list = NULL; return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); } diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index b1723c2620da..1a539969fc9c 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -63,11 +63,14 @@ struct buf_sel_arg { }; /* - * Return value from io_buffer_list selection. Just returns the error or - * user address for now, will be extended to return the buffer list in the - * future. + * Return value from io_buffer_list selection, to avoid stashing it in + * struct io_kiocb. For legacy/classic provided buffers, keeping a reference + * across execution contexts are fine. But for ring provided buffers, the + * list may go away as soon as ->uring_lock is dropped. As the io_kiocb + * persists, it's better to just keep the buffer local for those cases. */ struct io_br_sel { + struct io_buffer_list *buf_list; /* * Some selection parts return the user address, others return an error. */ @@ -107,13 +110,6 @@ struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx, static inline bool io_kbuf_recycle_ring(struct io_kiocb *req, struct io_buffer_list *bl) { - /* - * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear - * the flag and hence ensure that bl->head doesn't get incremented. - * If the tail has already been incremented, hang on to it. - * The exception is partial io, that case we should increment bl->head - * to monopolize the buffer. - */ if (bl) { req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT); return true; diff --git a/io_uring/net.c b/io_uring/net.c index 4eb208d24169..b00cd2f59091 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -433,7 +433,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->opcode == IORING_OP_SENDMSG) return -EINVAL; sr->msg_flags |= MSG_WAITALL; - req->buf_list = NULL; req->flags |= REQ_F_MULTISHOT; } @@ -512,11 +511,11 @@ static inline bool io_send_finish(struct io_kiocb *req, unsigned int cflags; if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { - cflags = io_put_kbuf(req, sel->val, req->buf_list); + cflags = io_put_kbuf(req, sel->val, sel->buf_list); goto finish; } - cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val)); + cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val)); if (bundle_finished || req->flags & REQ_F_BL_EMPTY) goto finish; @@ -657,6 +656,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) flags |= MSG_DONTWAIT; retry_bundle: + sel.buf_list = NULL; if (io_do_buffer_select(req)) { ret = io_send_select_buffer(req, issue_flags, &sel, kmsg); if (ret) @@ -682,7 +682,7 @@ retry_bundle: sr->len -= ret; sr->buf += ret; sr->done_io += ret; - return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret); + return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -795,18 +795,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= REQ_F_NOWAIT; if (sr->msg_flags & MSG_ERRQUEUE) req->flags |= REQ_F_CLEAR_POLLIN; - if (req->flags & REQ_F_BUFFER_SELECT) { - /* - * Store the buffer group for this multishot receive separately, - * as if we end up doing an io-wq based issue that selects a - * buffer, it has to be committed immediately and that will - * clear ->buf_list. This means we lose the link to the buffer - * list, and the eventual buffer put on completion then cannot - * restore it. - */ + if (req->flags & REQ_F_BUFFER_SELECT) sr->buf_group = req->buf_index; - req->buf_list = NULL; - } sr->mshot_total_len = sr->mshot_len = 0; if (sr->flags & IORING_RECV_MULTISHOT) { if (!(req->flags & REQ_F_BUFFER_SELECT)) @@ -874,7 +864,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, if (sr->flags & IORING_RECVSEND_BUNDLE) { size_t this_ret = sel->val - sr->done_io; - cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret)); + cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret)); if (sr->flags & IORING_RECV_RETRY) cflags = req->cqe.flags | (cflags & CQE_F_MASK); if (sr->mshot_len && sel->val >= sr->mshot_len) @@ -896,7 +886,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, return false; } } else { - cflags |= io_put_kbuf(req, sel->val, req->buf_list); + cflags |= io_put_kbuf(req, sel->val, sel->buf_list); } /* @@ -1038,6 +1028,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) flags |= MSG_DONTWAIT; retry_multishot: + sel.buf_list = NULL; if (io_do_buffer_select(req)) { size_t len = sr->len; @@ -1048,7 +1039,7 @@ retry_multishot: if (req->flags & REQ_F_APOLL_MULTISHOT) { ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); if (ret) { - io_kbuf_recycle(req, req->buf_list, issue_flags); + io_kbuf_recycle(req, sel.buf_list, issue_flags); return ret; } } @@ -1072,14 +1063,12 @@ retry_multishot: if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { - if (issue_flags & IO_URING_F_MULTISHOT) - io_kbuf_recycle(req, req->buf_list, issue_flags); - + io_kbuf_recycle(req, sel.buf_list, issue_flags); return IOU_RETRY; } if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; - return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret); + return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1093,7 +1082,7 @@ retry_multishot: else if (sr->done_io) ret = sr->done_io; else - io_kbuf_recycle(req, req->buf_list, issue_flags); + io_kbuf_recycle(req, sel.buf_list, issue_flags); sel.val = ret; if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) @@ -1178,7 +1167,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg = req->async_data; - struct io_br_sel sel = { }; + struct io_br_sel sel; struct socket *sock; unsigned flags; int ret, min_ret = 0; @@ -1198,6 +1187,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) flags |= MSG_DONTWAIT; retry_multishot: + sel.buf_list = NULL; if (io_do_buffer_select(req)) { sel.val = sr->len; ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); @@ -1217,16 +1207,14 @@ retry_multishot: ret = sock_recvmsg(sock, &kmsg->msg, flags); if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { - if (issue_flags & IO_URING_F_MULTISHOT) - io_kbuf_recycle(req, req->buf_list, issue_flags); - + io_kbuf_recycle(req, sel.buf_list, issue_flags); return IOU_RETRY; } if (ret > 0 && io_net_retry(sock, flags)) { sr->len -= ret; sr->buf += ret; sr->done_io += ret; - return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret); + return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1242,7 +1230,7 @@ out_free: else if (sr->done_io) ret = sr->done_io; else - io_kbuf_recycle(req, req->buf_list, issue_flags); + io_kbuf_recycle(req, sel.buf_list, issue_flags); sel.val = ret; if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) diff --git a/io_uring/poll.c b/io_uring/poll.c index 07ab22380c78..f3852bf7627b 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -316,10 +316,10 @@ void io_poll_task_func(struct io_kiocb *req, io_tw_token_t tw) ret = io_poll_check_events(req, tw); if (ret == IOU_POLL_NO_ACTION) { - io_kbuf_recycle(req, req->buf_list, 0); + io_kbuf_recycle(req, NULL, 0); return; } else if (ret == IOU_POLL_REQUEUE) { - io_kbuf_recycle(req, req->buf_list, 0); + io_kbuf_recycle(req, NULL, 0); __io_poll_execute(req, 0); return; } @@ -686,7 +686,7 @@ int io_arm_apoll(struct io_kiocb *req, unsigned issue_flags, __poll_t mask) req->flags |= REQ_F_POLLED; ipt.pt._qproc = io_async_queue_proc; - io_kbuf_recycle(req, req->buf_list, issue_flags); + io_kbuf_recycle(req, NULL, issue_flags); ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); if (ret) diff --git a/io_uring/rw.c b/io_uring/rw.c index 2b106f644383..906e869d330a 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -579,7 +579,7 @@ void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw) io_req_io_end(req); if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) - req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list); + req->cqe.flags |= io_put_kbuf(req, req->cqe.res, NULL); io_req_rw_cleanup(req, 0); io_req_task_complete(req, tw); @@ -648,7 +648,7 @@ static inline void io_rw_done(struct io_kiocb *req, ssize_t ret) } static int kiocb_done(struct io_kiocb *req, ssize_t ret, - unsigned int issue_flags) + struct io_br_sel *sel, unsigned int issue_flags) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned final_ret = io_fixup_rw_res(req, ret); @@ -662,7 +662,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, * from the submission path. */ io_req_io_end(req); - io_req_set_res(req, final_ret, io_put_kbuf(req, ret, req->buf_list)); + io_req_set_res(req, final_ret, io_put_kbuf(req, ret, sel->buf_list)); io_req_rw_cleanup(req, issue_flags); return IOU_COMPLETE; } else { @@ -1024,10 +1024,10 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags) ret = __io_read(req, &sel, issue_flags); if (ret >= 0) - return kiocb_done(req, ret, issue_flags); + return kiocb_done(req, ret, &sel, issue_flags); if (req->flags & REQ_F_BUFFERS_COMMIT) - io_kbuf_recycle(req, req->buf_list, issue_flags); + io_kbuf_recycle(req, sel.buf_list, issue_flags); return ret; } @@ -1057,15 +1057,15 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) * Reset rw->len to 0 again to avoid clamping future mshot * reads, in case the buffer size varies. */ - if (io_kbuf_recycle(req, req->buf_list, issue_flags)) + if (io_kbuf_recycle(req, sel.buf_list, issue_flags)) rw->len = 0; return IOU_RETRY; } else if (ret <= 0) { - io_kbuf_recycle(req, req->buf_list, issue_flags); + io_kbuf_recycle(req, sel.buf_list, issue_flags); if (ret < 0) req_set_fail(req); } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { - cflags = io_put_kbuf(req, ret, req->buf_list); + cflags = io_put_kbuf(req, ret, sel.buf_list); } else { /* * Any successful return value will keep the multishot read @@ -1073,7 +1073,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) * we fail to post a CQE, or multishot is no longer set, then * jump to the termination path. This request is then done. */ - cflags = io_put_kbuf(req, ret, req->buf_list); + cflags = io_put_kbuf(req, ret, sel.buf_list); rw->len = 0; /* similarly to above, reset len to 0 */ if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { @@ -1202,7 +1202,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; } done: - return kiocb_done(req, ret2, issue_flags); + return kiocb_done(req, ret2, NULL, issue_flags); } else { ret_eagain: iov_iter_restore(&io->iter, &io->iter_state); @@ -1370,7 +1370,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) if (!smp_load_acquire(&req->iopoll_completed)) break; nr_events++; - req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list); + req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL); if (req->opcode != IORING_OP_URING_CMD) io_req_rw_cleanup(req, 0); } -- cgit v1.2.3 From d589bcddaa3f8b1668499c3f0466863df3abe37a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 21 Aug 2025 12:02:06 +0800 Subject: io-uring: move `struct io_br_sel` into io_uring_types.h Move `struct io_br_sel` into io_uring_types.h and prepare for supporting provided buffer on uring_cmd. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250821040210.1152145-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 19 +++++++++++++++++++ io_uring/kbuf.h | 18 ------------------ 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 1d33984611bc..9c6c548f43f5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -85,6 +85,25 @@ struct io_mapped_region { unsigned flags; }; +/* + * Return value from io_buffer_list selection, to avoid stashing it in + * struct io_kiocb. For legacy/classic provided buffers, keeping a reference + * across execution contexts are fine. But for ring provided buffers, the + * list may go away as soon as ->uring_lock is dropped. As the io_kiocb + * persists, it's better to just keep the buffer local for those cases. + */ +struct io_br_sel { + struct io_buffer_list *buf_list; + /* + * Some selection parts return the user address, others return an error. + */ + union { + void __user *addr; + ssize_t val; + }; +}; + + /* * Arbitrary limit, can be raised if need be */ diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 32f73adbe1e9..ada382ff38d7 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -62,24 +62,6 @@ struct buf_sel_arg { unsigned short partial_map; }; -/* - * Return value from io_buffer_list selection, to avoid stashing it in - * struct io_kiocb. For legacy/classic provided buffers, keeping a reference - * across execution contexts are fine. But for ring provided buffers, the - * list may go away as soon as ->uring_lock is dropped. As the io_kiocb - * persists, it's better to just keep the buffer local for those cases. - */ -struct io_br_sel { - struct io_buffer_list *buf_list; - /* - * Some selection parts return the user address, others return an error. - */ - union { - void __user *addr; - ssize_t val; - }; -}; - struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len, unsigned buf_group, unsigned int issue_flags); int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, -- cgit v1.2.3 From 620a50c927004f5c9420a7ca9b1a55673dbf3941 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 21 Aug 2025 12:02:07 +0800 Subject: io_uring: uring_cmd: add multishot support Add UAPI flag IORING_URING_CMD_MULTISHOT for supporting multishot uring_cmd operations with provided buffer. This enables drivers to post multiple completion events from a single uring_cmd submission, which is useful for: - Notifying userspace of device events (e.g., interrupt handling) - Supporting devices with multiple event sources (e.g., multi-queue devices) - Avoiding the need for device poll() support when events originate from multiple sources device-wide The implementation adds two new APIs: - io_uring_cmd_select_buffer(): selects a buffer from the provided buffer group for multishot uring_cmd - io_uring_mshot_cmd_post_cqe(): posts a CQE after event data is pushed to the provided buffer Multishot uring_cmd must be used with buffer select (IOSQE_BUFFER_SELECT) and is mutually exclusive with IORING_URING_CMD_FIXED for now. The ublk driver will be the first user of this functionality: https://github.com/ming1/linux/commits/ublk-devel/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250821040210.1152145-3-ming.lei@redhat.com [axboe: fold in fix for !CONFIG_IO_URING] Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 26 ++++++++++++++++ include/uapi/linux/io_uring.h | 6 +++- io_uring/opdef.c | 1 + io_uring/uring_cmd.c | 71 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 102 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index cfa6d0c0c322..4bd3a7339243 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -70,6 +70,21 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, /* Execute the request from a blocking context */ void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd); +/* + * Select a buffer from the provided buffer group for multishot uring_cmd. + * Returns the selected buffer address and size. + */ +struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, + unsigned buf_group, size_t *len, + unsigned int issue_flags); + +/* + * Complete a multishot uring_cmd event. This will post a CQE to the completion + * queue and update the provided buffer. + */ +bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, + struct io_br_sel *sel, unsigned int issue_flags); + #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, @@ -102,6 +117,17 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd) { } +static inline struct io_br_sel +io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group, + size_t *len, unsigned int issue_flags) +{ + return (struct io_br_sel) { .val = -EOPNOTSUPP }; +} +static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, + ssize_t ret, unsigned int issue_flags) +{ + return true; +} #endif /* diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6957dc539d83..1e935f8901c5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -298,9 +298,13 @@ enum io_uring_op { * sqe->uring_cmd_flags top 8bits aren't available for userspace * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. + * IORING_URING_CMD_MULTISHOT must be used with buffer select, like other + * multishot commands. Not compatible with + * IORING_URING_CMD_FIXED, for now. */ #define IORING_URING_CMD_FIXED (1U << 0) -#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED +#define IORING_URING_CMD_MULTISHOT (1U << 1) +#define IORING_URING_CMD_MASK (IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT) /* diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 9568785810d9..932319633eac 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -413,6 +413,7 @@ const struct io_issue_def io_issue_defs[] = { #endif }, [IORING_OP_URING_CMD] = { + .buffer_select = 1, .needs_file = 1, .plug = 1, .iopoll = 1, diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 053bac89b6c0..3cfb5d51b88a 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -11,6 +11,7 @@ #include "io_uring.h" #include "alloc_cache.h" #include "rsrc.h" +#include "kbuf.h" #include "uring_cmd.h" #include "poll.h" @@ -194,8 +195,21 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ioucmd->flags & ~IORING_URING_CMD_MASK) return -EINVAL; - if (ioucmd->flags & IORING_URING_CMD_FIXED) + if (ioucmd->flags & IORING_URING_CMD_FIXED) { + if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) + return -EINVAL; req->buf_index = READ_ONCE(sqe->buf_index); + } + + if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) { + if (ioucmd->flags & IORING_URING_CMD_FIXED) + return -EINVAL; + if (!(req->flags & REQ_F_BUFFER_SELECT)) + return -EINVAL; + } else { + if (req->flags & REQ_F_BUFFER_SELECT) + return -EINVAL; + } ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); @@ -251,6 +265,10 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) } ret = file->f_op->uring_cmd(ioucmd, issue_flags); + if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) { + if (ret >= 0) + return IOU_ISSUE_SKIP_COMPLETE; + } if (ret == -EAGAIN) { ioucmd->flags |= IORING_URING_CMD_REISSUE; return ret; @@ -333,3 +351,54 @@ bool io_uring_cmd_post_mshot_cqe32(struct io_uring_cmd *cmd, return false; return io_req_post_cqe32(req, cqe); } + +/* + * Work with io_uring_mshot_cmd_post_cqe() together for committing the + * provided buffer upfront + */ +struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, + unsigned buf_group, size_t *len, + unsigned int issue_flags) +{ + struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); + + if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT)) + return (struct io_br_sel) { .val = -EINVAL }; + + if (WARN_ON_ONCE(!io_do_buffer_select(req))) + return (struct io_br_sel) { .val = -EINVAL }; + + return io_buffer_select(req, len, buf_group, issue_flags); +} +EXPORT_SYMBOL_GPL(io_uring_cmd_buffer_select); + +/* + * Return true if this multishot uring_cmd needs to be completed, otherwise + * the event CQE is posted successfully. + * + * This function must use `struct io_br_sel` returned from + * io_uring_cmd_buffer_select() for committing the buffer in the same + * uring_cmd submission context. + */ +bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, + struct io_br_sel *sel, unsigned int issue_flags) +{ + struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); + unsigned int cflags = 0; + + if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT)) + return true; + + if (sel->val > 0) { + cflags = io_put_kbuf(req, sel->val, sel->buf_list); + if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) + return false; + } + + io_kbuf_recycle(req, sel->buf_list, issue_flags); + if (sel->val < 0) + req_set_fail(req); + io_req_set_res(req, sel->val, cflags); + return true; +} +EXPORT_SYMBOL_GPL(io_uring_mshot_cmd_post_cqe); -- cgit v1.2.3 From d0201c4436c53412146d526855c585fa9d54ca13 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Aug 2025 14:01:46 -0600 Subject: io_uring: remove io_ctx_cqe32() helper It's pretty pointless and only used for the tracing helper, get rid of it. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 ------ include/trace/events/io_uring.h | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 9c6c548f43f5..d1e25f3fe0b3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -740,10 +740,4 @@ struct io_overflow_cqe { struct list_head list; struct io_uring_cqe cqe; }; - -static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx) -{ - return ctx->flags & IORING_SETUP_CQE32; -} - #endif diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 178ab6f611be..6a970625a3ea 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -340,8 +340,8 @@ TP_PROTO(struct io_ring_ctx *ctx, void *req, struct io_uring_cqe *cqe), __entry->user_data = cqe->user_data; __entry->res = cqe->res; __entry->cflags = cqe->flags; - __entry->extra1 = io_ctx_cqe32(ctx) ? cqe->big_cqe[0] : 0; - __entry->extra2 = io_ctx_cqe32(ctx) ? cqe->big_cqe[1] : 0; + __entry->extra1 = ctx->flags & IORING_SETUP_CQE32 ? cqe->big_cqe[0] : 0; + __entry->extra2 = ctx->flags & IORING_SETUP_CQE32 ? cqe->big_cqe[1] : 0; ), TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x " -- cgit v1.2.3 From 6e376f245f19feeadddafb2c3fa5fbd6469ecdfe Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 25 Aug 2025 11:48:42 +0200 Subject: gpio: generic: provide to_gpio_generic_chip() Provide a helper allowing to convert a struct gpio_chip address to the struct gpio_generic_chip that wraps it. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250825-gpio-mmio-gpio-conv-v1-1-356b4b1d5110@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/generic.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index f3a8db4598bb..5a85ecbef8d2 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -55,6 +55,12 @@ struct gpio_generic_chip { struct gpio_chip gc; }; +static inline struct gpio_generic_chip * +to_gpio_generic_chip(struct gpio_chip *gc) +{ + return container_of(gc, struct gpio_generic_chip, gc); +} + /** * gpio_generic_chip_init() - Initialize a generic GPIO chip. * @chip: Generic GPIO chip to set up. -- cgit v1.2.3 From 16397871b6e35fa46a2bec27b3558f93b050c6fc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 25 Aug 2025 11:48:43 +0200 Subject: gpio: generic: provide helpers for reading and writing registers Provide helpers wrapping the read_reg() and write_reg() callbacks of the generic GPIO API that are called directly by many users. This is done to hide their implementation ahead of moving them into the separate generic GPIO struct. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250825-gpio-mmio-gpio-conv-v1-2-356b4b1d5110@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/generic.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index 5a85ecbef8d2..4c0626b53ec9 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -100,6 +100,37 @@ gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, return chip->gc.set(&chip->gc, offset, value); } +/** + * gpio_generic_read_reg() - Read a register using the underlying callback. + * @chip: Generic GPIO chip to use. + * @reg: Register to read. + * + * Returns: value read from register. + */ +static inline unsigned long +gpio_generic_read_reg(struct gpio_generic_chip *chip, void __iomem *reg) +{ + if (WARN_ON(!chip->gc.read_reg)) + return 0; + + return chip->gc.read_reg(reg); +} + +/** + * gpio_generic_write_reg() - Write a register using the underlying callback. + * @chip: Generic GPIO chip to use. + * @reg: Register to write to. + * @val: New value to write. + */ +static inline void gpio_generic_write_reg(struct gpio_generic_chip *chip, + void __iomem *reg, unsigned long val) +{ + if (WARN_ON(!chip->gc.write_reg)) + return; + + chip->gc.write_reg(reg, val); +} + #define gpio_generic_chip_lock(gen_gc) \ raw_spin_lock(&(gen_gc)->gc.bgpio_lock) -- cgit v1.2.3 From 60ad9a07319283e6e1094cef3e972e754315c024 Mon Sep 17 00:00:00 2001 From: Junjie Cao Date: Wed, 20 Aug 2025 08:47:55 +0800 Subject: iio: core: switch info_mask fields to unsigned long to match find_bit helpers for_each_set_bit()/find_*_bit() expect arrays of unsigned long (see include/linux/find.h), but industrialio-core passed const long * into iio_device_add_info_mask_type{,_avail}(). These masks are used purely as bit arrays and are populated via BIT() (1UL << n). Switch the info_mask_* fields and the corresponding function parameters to unsigned long so the types match the helpers. This removes sparse warnings about signedness mismatches (seen with 'make C=1' CF='-Wsparse-all') without changing behavior or struct layout. No functional change intended. Suggested-by: Jonathan Cameron Signed-off-by: Junjie Cao Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250820004755.69627-1-junjie.cao@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 4 ++-- include/linux/iio/iio.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index eb6a54f8115d..38848d753a33 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1244,7 +1244,7 @@ static int iio_device_add_channel_label(struct iio_dev *indio_dev, static int iio_device_add_info_mask_type(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, enum iio_shared_by shared_by, - const long *infomask) + const unsigned long *infomask) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); int i, ret, attrcount = 0; @@ -1274,7 +1274,7 @@ static int iio_device_add_info_mask_type(struct iio_dev *indio_dev, static int iio_device_add_info_mask_type_avail(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, enum iio_shared_by shared_by, - const long *infomask) + const unsigned long *infomask) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); int i, ret, attrcount = 0; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 2f5560646ee4..872ebdf0dd77 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -271,14 +271,14 @@ struct iio_chan_spec { unsigned int num_ext_scan_type; }; }; - long info_mask_separate; - long info_mask_separate_available; - long info_mask_shared_by_type; - long info_mask_shared_by_type_available; - long info_mask_shared_by_dir; - long info_mask_shared_by_dir_available; - long info_mask_shared_by_all; - long info_mask_shared_by_all_available; + unsigned long info_mask_separate; + unsigned long info_mask_separate_available; + unsigned long info_mask_shared_by_type; + unsigned long info_mask_shared_by_type_available; + unsigned long info_mask_shared_by_dir; + unsigned long info_mask_shared_by_dir_available; + unsigned long info_mask_shared_by_all; + unsigned long info_mask_shared_by_all_available; const struct iio_event_spec *event_spec; unsigned int num_event_specs; const struct iio_chan_spec_ext_info *ext_info; -- cgit v1.2.3 From 7a6fc1634cea6f220228a69b1c0210e6b8b1aaf0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:45 -0700 Subject: blk-mq-dma: create blk_map_iter type The req_iterator happens to have a similar fields to what the dma iterator needs, but we're not necessarily iterating a request's bi_io_vec. Create a new type that can be amended for additional future use. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-2-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-mq-dma.c | 4 ++-- include/linux/blk-mq-dma.h | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c index ad283017caef..51e7a0ff045f 100644 --- a/block/blk-mq-dma.c +++ b/block/blk-mq-dma.c @@ -10,7 +10,7 @@ struct phys_vec { u32 len; }; -static bool blk_map_iter_next(struct request *req, struct req_iterator *iter, +static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, struct phys_vec *vec) { unsigned int max_size; @@ -246,7 +246,7 @@ blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg) { - struct req_iterator iter = { + struct blk_map_iter iter = { .bio = rq->bio, }; struct phys_vec vec; diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index c26a01aeae00..6a7e3828673d 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -5,6 +5,11 @@ #include #include +struct blk_map_iter { + struct bvec_iter iter; + struct bio *bio; +}; + struct blk_dma_iter { /* Output address range for this iteration */ dma_addr_t addr; @@ -14,7 +19,7 @@ struct blk_dma_iter { blk_status_t status; /* Internal to blk_rq_dma_map_iter_* */ - struct req_iterator iter; + struct blk_map_iter iter; struct pci_p2pdma_map_state p2pdma; }; -- cgit v1.2.3 From dae75dead2359edd7c55e1964e0edf7d03535b31 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:46 -0700 Subject: blk-mq-dma: provide the bio_vec array being iterated This will make it easier to add different sources of the bvec array, like for upcoming integrity support, rather than assume to use the bio's bi_io_vec. It also makes iterating "special" payloads more in common with iterating normal payloads. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-3-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-mq-dma.c | 56 +++++++++++++++++++++++++++------------------- include/linux/blk-mq-dma.h | 1 + 2 files changed, 34 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c index 51e7a0ff045f..8f41fe740b46 100644 --- a/block/blk-mq-dma.c +++ b/block/blk-mq-dma.c @@ -16,23 +16,14 @@ static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, unsigned int max_size; struct bio_vec bv; - if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - if (!iter->bio) - return false; - vec->paddr = bvec_phys(&req->special_vec); - vec->len = req->special_vec.bv_len; - iter->bio = NULL; - return true; - } - if (!iter->iter.bi_size) return false; - bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); + bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter); vec->paddr = bvec_phys(&bv); max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); bv.bv_len = min(bv.bv_len, max_size); - bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len); + bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len); /* * If we are entirely done with this bi_io_vec entry, check if the next @@ -43,19 +34,20 @@ static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, struct bio_vec next; if (!iter->iter.bi_size) { - if (!iter->bio->bi_next) + if (!iter->bio || !iter->bio->bi_next) break; iter->bio = iter->bio->bi_next; iter->iter = iter->bio->bi_iter; + iter->bvecs = iter->bio->bi_io_vec; } - next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); + next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); if (bv.bv_len + next.bv_len > max_size || !biovec_phys_mergeable(req->q, &bv, &next)) break; bv.bv_len += next.bv_len; - bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len); + bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len); } vec->len = bv.bv_len; @@ -125,6 +117,30 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, return true; } +static inline void blk_rq_map_iter_init(struct request *rq, + struct blk_map_iter *iter) +{ + struct bio *bio = rq->bio; + + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { + *iter = (struct blk_map_iter) { + .bvecs = &rq->special_vec, + .iter = { + .bi_size = rq->special_vec.bv_len, + } + }; + } else if (bio) { + *iter = (struct blk_map_iter) { + .bio = bio, + .bvecs = bio->bi_io_vec, + .iter = bio->bi_iter, + }; + } else { + /* the internal flush request may not have bio attached */ + *iter = (struct blk_map_iter) {}; + } +} + /** * blk_rq_dma_map_iter_start - map the first DMA segment for a request * @req: request to map @@ -153,8 +169,7 @@ bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, unsigned int total_len = blk_rq_payload_bytes(req); struct phys_vec vec; - iter->iter.bio = req->bio; - iter->iter.iter = req->bio->bi_iter; + blk_rq_map_iter_init(req, &iter->iter); memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); iter->status = BLK_STS_OK; @@ -246,16 +261,11 @@ blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg) { - struct blk_map_iter iter = { - .bio = rq->bio, - }; + struct blk_map_iter iter; struct phys_vec vec; int nsegs = 0; - /* the internal flush request may not have bio attached */ - if (iter.bio) - iter.iter = iter.bio->bi_iter; - + blk_rq_map_iter_init(rq, &iter); while (blk_map_iter_next(rq, &iter, &vec)) { *last_sg = blk_next_sg(last_sg, sglist); sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index 6a7e3828673d..e5cb5e46fc92 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -8,6 +8,7 @@ struct blk_map_iter { struct bvec_iter iter; struct bio *bio; + struct bio_vec *bvecs; }; struct blk_dma_iter { -- cgit v1.2.3 From 92fb75fd14b041038e30bc725ab4c1e625243573 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:47 -0700 Subject: blk-mq-dma: require unmap caller provide p2p map type In preparing for integrity dma mappings, we can't rely on the request flag because data and metadata may have different mapping types. Signed-off-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-4-kbusch@meta.com Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 9 ++++++++- include/linux/blk-mq-dma.h | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2c6d9506b172..111b6bc6c93e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -261,6 +261,9 @@ enum nvme_iod_flags { /* single segment dma mapping */ IOD_SINGLE_SEGMENT = 1U << 2, + + /* DMA mapped with PCI_P2PDMA_MAP_BUS_ADDR */ + IOD_P2P_BUS_ADDR = 1U << 3, }; struct nvme_dma_vec { @@ -725,7 +728,8 @@ static void nvme_unmap_data(struct request *req) return; } - if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) { + if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len, + iod->flags & IOD_P2P_BUS_ADDR)) { if (nvme_pci_cmd_use_sgl(&iod->cmd)) nvme_free_sgls(req); else @@ -1000,6 +1004,9 @@ static blk_status_t nvme_map_data(struct request *req) if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter)) return iter.status; + if (iter.p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) + iod->flags |= IOD_P2P_BUS_ADDR; + if (use_sgl == SGL_FORCED || (use_sgl == SGL_SUPPORTED && (sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold))) diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index e5cb5e46fc92..881880095e0d 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -47,14 +47,15 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state) * @dma_dev: device to unmap from * @state: DMA IOVA state * @mapped_len: number of bytes to unmap + * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR * * Returns %false if the callers need to manually unmap every DMA segment * mapped using @iter or %true if no work is left to be done. */ static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, - struct dma_iova_state *state, size_t mapped_len) + struct dma_iova_state *state, size_t mapped_len, bool is_p2p) { - if (req->cmd_flags & REQ_P2PDMA) + if (is_p2p) return true; if (dma_use_iova(state)) { -- cgit v1.2.3 From 7092639031a1bd5320ab827e8f665350f332b7ce Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:48 -0700 Subject: blk-mq: remove REQ_P2PDMA flag It's not serving any particular purpose. pci_p2pdma_state() already has all the appropriate checks, so the config and flag checks are not guarding anything. Signed-off-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-5-kbusch@meta.com Signed-off-by: Jens Axboe --- block/bio.c | 2 +- block/blk-mq-dma.c | 30 ++++++++++++++---------------- include/linux/blk_types.h | 2 -- 3 files changed, 15 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 3b371a5da159..44c43b970387 100644 --- a/block/bio.c +++ b/block/bio.c @@ -981,7 +981,7 @@ void __bio_add_page(struct bio *bio, struct page *page, WARN_ON_ONCE(bio_full(bio, len)); if (is_pci_p2pdma_page(page)) - bio->bi_opf |= REQ_P2PDMA | REQ_NOMERGE; + bio->bi_opf |= REQ_NOMERGE; bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, off); bio->bi_iter.bi_size += len; diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c index 8f41fe740b46..58defab21882 100644 --- a/block/blk-mq-dma.c +++ b/block/blk-mq-dma.c @@ -180,22 +180,20 @@ bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, if (!blk_map_iter_next(req, &iter->iter, &vec)) return false; - if (IS_ENABLED(CONFIG_PCI_P2PDMA) && (req->cmd_flags & REQ_P2PDMA)) { - switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, - phys_to_page(vec.paddr))) { - case PCI_P2PDMA_MAP_BUS_ADDR: - return blk_dma_map_bus(iter, &vec); - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * P2P transfers through the host bridge are treated the - * same as non-P2P transfers below and during unmap. - */ - req->cmd_flags &= ~REQ_P2PDMA; - break; - default: - iter->status = BLK_STS_INVAL; - return false; - } + switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, + phys_to_page(vec.paddr))) { + case PCI_P2PDMA_MAP_BUS_ADDR: + return blk_dma_map_bus(iter, &vec); + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * P2P transfers through the host bridge are treated the + * same as non-P2P transfers below and during unmap. + */ + case PCI_P2PDMA_MAP_NONE: + break; + default: + iter->status = BLK_STS_INVAL; + return false; } if (blk_can_dma_map_iova(req, dma_dev) && diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 09b99d52fd36..930daff207df 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -386,7 +386,6 @@ enum req_flag_bits { __REQ_DRV, /* for driver use */ __REQ_FS_PRIVATE, /* for file system (submitter) use */ __REQ_ATOMIC, /* for atomic write operations */ - __REQ_P2PDMA, /* contains P2P DMA pages */ /* * Command specific flags, keep last: */ @@ -419,7 +418,6 @@ enum req_flag_bits { #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) #define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC) -#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA) #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) -- cgit v1.2.3 From fec9b16dc5550191fd85af118271ea00e8dcc5f8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:50 -0700 Subject: blk-mq-dma: add scatter-less integrity data DMA mapping Similar to regular data, introduce more efficient integrity mapping helpers that does away with the scatterlist structure. This uses the block mapping iterator to add IOVA segments if IOMMU is enabled, or maps directly if not. This also supports P2P segements if integrity data ever wants to allocate that type of memory. Signed-off-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-7-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-mq-dma.c | 104 +++++++++++++++++++++++++++++++++++++++--- include/linux/blk-integrity.h | 17 +++++++ include/linux/blk-mq-dma.h | 1 + 3 files changed, 115 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c index 31dd8f58f081..60a244a129c3 100644 --- a/block/blk-mq-dma.c +++ b/block/blk-mq-dma.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2025 Christoph Hellwig */ +#include #include #include "blk.h" @@ -10,6 +11,24 @@ struct phys_vec { u32 len; }; +static bool __blk_map_iter_next(struct blk_map_iter *iter) +{ + if (iter->iter.bi_size) + return true; + if (!iter->bio || !iter->bio->bi_next) + return false; + + iter->bio = iter->bio->bi_next; + if (iter->is_integrity) { + iter->iter = bio_integrity(iter->bio)->bip_iter; + iter->bvecs = bio_integrity(iter->bio)->bip_vec; + } else { + iter->iter = iter->bio->bi_iter; + iter->bvecs = iter->bio->bi_io_vec; + } + return true; +} + static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, struct phys_vec *vec) { @@ -33,13 +52,8 @@ static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { struct bio_vec next; - if (!iter->iter.bi_size) { - if (!iter->bio || !iter->bio->bi_next) - break; - iter->bio = iter->bio->bi_next; - iter->iter = iter->bio->bi_iter; - iter->bvecs = iter->bio->bi_io_vec; - } + if (!__blk_map_iter_next(iter)) + break; next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); if (bv.bv_len + next.bv_len > max_size || @@ -290,3 +304,79 @@ int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, return nsegs; } EXPORT_SYMBOL(__blk_rq_map_sg); + +#ifdef CONFIG_BLK_DEV_INTEGRITY +/** + * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment + * for a request + * @req: request to map + * @dma_dev: device to map to + * @state: DMA IOVA state + * @iter: block layer DMA iterator + * + * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are + * provided by the caller and don't need to be initialized. @state needs to be + * stored for use at unmap time, @iter is only needed at map time. + * + * Returns %false if there is no segment to map, including due to an error, or + * %true if it did map a segment. + * + * If a segment was mapped, the DMA address for it is returned in @iter.addr + * and the length in @iter.len. If no segment was mapped the status code is + * returned in @iter.status. + * + * The caller can call blk_rq_dma_map_coalesce() to check if further segments + * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() + * to try to map the following segments. + */ +bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter) +{ + unsigned len = bio_integrity_bytes(&req->q->limits.integrity, + blk_rq_sectors(req)); + struct bio *bio = req->bio; + + iter->iter = (struct blk_map_iter) { + .bio = bio, + .iter = bio_integrity(bio)->bip_iter, + .bvecs = bio_integrity(bio)->bip_vec, + .is_integrity = true, + }; + return blk_dma_map_iter_start(req, dma_dev, state, iter, len); +} +EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); + +/** + * blk_rq_integrity_dma_map_iter_start - map the next integrity DMA segment for + * a request + * @req: request to map + * @dma_dev: device to map to + * @state: DMA IOVA state + * @iter: block layer DMA iterator + * + * Iterate to the next integrity mapping after a previous call to + * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description + * of the arguments. + * + * Returns %false if there is no segment to map, including due to an error, or + * %true if it did map a segment. + * + * If a segment was mapped, the DMA address for it is returned in @iter.addr and + * the length in @iter.len. If no segment was mapped the status code is + * returned in @iter.status. + */ +bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter) +{ + struct phys_vec vec; + + if (!blk_map_iter_next(req, &iter->iter, &vec)) + return false; + + if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) + return blk_dma_map_bus(iter, &vec); + return blk_dma_map_direct(req, dma_dev, iter, &vec); +} +EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); +#endif diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index e67a2b6e8f11..78fe2459e661 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -4,6 +4,7 @@ #include #include +#include struct request; @@ -31,6 +32,11 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes); int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, struct logical_block_metadata_cap __user *argp); +bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter); +bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -115,6 +121,17 @@ static inline int blk_rq_integrity_map_user(struct request *rq, { return -EINVAL; } +static inline bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter) +{ + return false; +} +static inline bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter) +{ + return false; +} static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) { return NULL; diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index 881880095e0d..0f45ea110ca1 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -9,6 +9,7 @@ struct blk_map_iter { struct bvec_iter iter; struct bio *bio; struct bio_vec *bvecs; + bool is_integrity; }; struct blk_dma_iter { -- cgit v1.2.3 From cbdd16b818eef876dd2de9d503fe7397a0666cbe Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Mon, 18 Aug 2025 19:49:34 +0800 Subject: HID: i2c-hid: Make elan touch controllers power on after panel is enabled Introduce a new HID quirk to indicate that this device has to be enabled after the panel's backlight is enabled, and update the driver data for the elan devices to enable this quirk. This cannot be a I2C HID quirk because the kernel needs to acknowledge this before powering up the device and read the VID/PID. When this quirk is enabled, register .panel_enabled()/.panel_disabling() instead for the panel follower. Also rename the *panel_prepare* functions into *panel_follower* because they could be called in other situations now. Fixes: bd3cba00dcc63 ("HID: i2c-hid: elan: Add support for Elan eKTH6915 i2c-hid touchscreens") Fixes: d06651bebf99e ("HID: i2c-hid: elan: Add elan-ekth6a12nay timing") Reviewed-by: Douglas Anderson Signed-off-by: Pin-yen Lin Acked-by: Jiri Kosina Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250818115015.2909525-2-treapking@chromium.org --- drivers/hid/i2c-hid/i2c-hid-core.c | 46 +++++++++++++++++++++-------------- drivers/hid/i2c-hid/i2c-hid-of-elan.c | 11 ++++++++- include/linux/hid.h | 2 ++ 3 files changed, 40 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index d3912e3f2f13..99ce6386176c 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -112,9 +112,9 @@ struct i2c_hid { struct i2chid_ops *ops; struct drm_panel_follower panel_follower; - struct work_struct panel_follower_prepare_work; + struct work_struct panel_follower_work; bool is_panel_follower; - bool prepare_work_finished; + bool panel_follower_work_finished; }; static const struct i2c_hid_quirks { @@ -1110,10 +1110,10 @@ err_power_down: return ret; } -static void ihid_core_panel_prepare_work(struct work_struct *work) +static void ihid_core_panel_follower_work(struct work_struct *work) { struct i2c_hid *ihid = container_of(work, struct i2c_hid, - panel_follower_prepare_work); + panel_follower_work); struct hid_device *hid = ihid->hid; int ret; @@ -1130,7 +1130,7 @@ static void ihid_core_panel_prepare_work(struct work_struct *work) if (ret) dev_warn(&ihid->client->dev, "Power on failed: %d\n", ret); else - WRITE_ONCE(ihid->prepare_work_finished, true); + WRITE_ONCE(ihid->panel_follower_work_finished, true); /* * The work APIs provide a number of memory ordering guarantees @@ -1139,12 +1139,12 @@ static void ihid_core_panel_prepare_work(struct work_struct *work) * guarantee that a write that happened in the work is visible after * cancel_work_sync(). We'll add a write memory barrier here to match * with i2c_hid_core_panel_unpreparing() to ensure that our write to - * prepare_work_finished is visible there. + * panel_follower_work_finished is visible there. */ smp_wmb(); } -static int i2c_hid_core_panel_prepared(struct drm_panel_follower *follower) +static int i2c_hid_core_panel_follower_resume(struct drm_panel_follower *follower) { struct i2c_hid *ihid = container_of(follower, struct i2c_hid, panel_follower); @@ -1152,29 +1152,36 @@ static int i2c_hid_core_panel_prepared(struct drm_panel_follower *follower) * Powering on a touchscreen can be a slow process. Queue the work to * the system workqueue so we don't block the panel's power up. */ - WRITE_ONCE(ihid->prepare_work_finished, false); - schedule_work(&ihid->panel_follower_prepare_work); + WRITE_ONCE(ihid->panel_follower_work_finished, false); + schedule_work(&ihid->panel_follower_work); return 0; } -static int i2c_hid_core_panel_unpreparing(struct drm_panel_follower *follower) +static int i2c_hid_core_panel_follower_suspend(struct drm_panel_follower *follower) { struct i2c_hid *ihid = container_of(follower, struct i2c_hid, panel_follower); - cancel_work_sync(&ihid->panel_follower_prepare_work); + cancel_work_sync(&ihid->panel_follower_work); - /* Match with ihid_core_panel_prepare_work() */ + /* Match with ihid_core_panel_follower_work() */ smp_rmb(); - if (!READ_ONCE(ihid->prepare_work_finished)) + if (!READ_ONCE(ihid->panel_follower_work_finished)) return 0; return i2c_hid_core_suspend(ihid, true); } -static const struct drm_panel_follower_funcs i2c_hid_core_panel_follower_funcs = { - .panel_prepared = i2c_hid_core_panel_prepared, - .panel_unpreparing = i2c_hid_core_panel_unpreparing, +static const struct drm_panel_follower_funcs + i2c_hid_core_panel_follower_prepare_funcs = { + .panel_prepared = i2c_hid_core_panel_follower_resume, + .panel_unpreparing = i2c_hid_core_panel_follower_suspend, +}; + +static const struct drm_panel_follower_funcs + i2c_hid_core_panel_follower_enable_funcs = { + .panel_enabled = i2c_hid_core_panel_follower_resume, + .panel_disabling = i2c_hid_core_panel_follower_suspend, }; static int i2c_hid_core_register_panel_follower(struct i2c_hid *ihid) @@ -1182,7 +1189,10 @@ static int i2c_hid_core_register_panel_follower(struct i2c_hid *ihid) struct device *dev = &ihid->client->dev; int ret; - ihid->panel_follower.funcs = &i2c_hid_core_panel_follower_funcs; + if (ihid->hid->initial_quirks | HID_QUIRK_POWER_ON_AFTER_BACKLIGHT) + ihid->panel_follower.funcs = &i2c_hid_core_panel_follower_enable_funcs; + else + ihid->panel_follower.funcs = &i2c_hid_core_panel_follower_prepare_funcs; /* * If we're not in control of our own power up/power down then we can't @@ -1237,7 +1247,7 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, init_waitqueue_head(&ihid->wait); mutex_init(&ihid->cmd_lock); mutex_init(&ihid->reset_lock); - INIT_WORK(&ihid->panel_follower_prepare_work, ihid_core_panel_prepare_work); + INIT_WORK(&ihid->panel_follower_work, ihid_core_panel_follower_work); /* we need to allocate the command buffer without knowing the maximum * size of the reports. Let's use HID_MIN_BUFFER_SIZE, then we do the diff --git a/drivers/hid/i2c-hid/i2c-hid-of-elan.c b/drivers/hid/i2c-hid/i2c-hid-of-elan.c index 3fcff6daa0d3..0215f217f6d8 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of-elan.c +++ b/drivers/hid/i2c-hid/i2c-hid-of-elan.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,7 @@ struct elan_i2c_hid_chip_data { unsigned int post_power_delay_ms; u16 hid_descriptor_address; const char *main_supply_name; + bool power_after_backlight; }; struct i2c_hid_of_elan { @@ -97,6 +99,7 @@ static int i2c_hid_of_elan_probe(struct i2c_client *client) { struct i2c_hid_of_elan *ihid_elan; int ret; + u32 quirks = 0; ihid_elan = devm_kzalloc(&client->dev, sizeof(*ihid_elan), GFP_KERNEL); if (!ihid_elan) @@ -131,8 +134,12 @@ static int i2c_hid_of_elan_probe(struct i2c_client *client) } } + if (ihid_elan->chip_data->power_after_backlight) + quirks = HID_QUIRK_POWER_ON_AFTER_BACKLIGHT; + ret = i2c_hid_core_probe(client, &ihid_elan->ops, - ihid_elan->chip_data->hid_descriptor_address, 0); + ihid_elan->chip_data->hid_descriptor_address, + quirks); if (ret) goto err_deassert_reset; @@ -150,6 +157,7 @@ static const struct elan_i2c_hid_chip_data elan_ekth6915_chip_data = { .post_gpio_reset_on_delay_ms = 300, .hid_descriptor_address = 0x0001, .main_supply_name = "vcc33", + .power_after_backlight = true, }; static const struct elan_i2c_hid_chip_data elan_ekth6a12nay_chip_data = { @@ -157,6 +165,7 @@ static const struct elan_i2c_hid_chip_data elan_ekth6a12nay_chip_data = { .post_gpio_reset_on_delay_ms = 300, .hid_descriptor_address = 0x0001, .main_supply_name = "vcc33", + .power_after_backlight = true, }; static const struct elan_i2c_hid_chip_data ilitek_ili9882t_chip_data = { diff --git a/include/linux/hid.h b/include/linux/hid.h index 2cc4f1e4ea96..c32425b5d011 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -364,6 +364,7 @@ struct hid_item { * | @HID_QUIRK_HAVE_SPECIAL_DRIVER: * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE: * | @HID_QUIRK_IGNORE_SPECIAL_DRIVER + * | @HID_QUIRK_POWER_ON_AFTER_BACKLIGHT * | @HID_QUIRK_FULLSPEED_INTERVAL: * | @HID_QUIRK_NO_INIT_REPORTS: * | @HID_QUIRK_NO_IGNORE: @@ -391,6 +392,7 @@ struct hid_item { #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20) #define HID_QUIRK_NOINVERT BIT(21) #define HID_QUIRK_IGNORE_SPECIAL_DRIVER BIT(22) +#define HID_QUIRK_POWER_ON_AFTER_BACKLIGHT BIT(23) #define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) #define HID_QUIRK_NO_INIT_REPORTS BIT(29) #define HID_QUIRK_NO_IGNORE BIT(30) -- cgit v1.2.3 From a214365140cc3009f07d4e14a8b481fd3dc41d31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Jul 2025 15:15:28 +0300 Subject: rculist: move list_for_each_rcu() to where it belongs The list_for_each_rcu() relies on the rcu_dereference() API which is not provided by the list.h. At the same time list.h is a low-level basic header that must not have dependencies like RCU, besides the fact of the potential circular dependencies in some cases. With all that said, move RCU related API to the rculist.h where it belongs. Signed-off-by: Andy Shevchenko Reviewed-by: Simona Vetter Reviewed-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay (AMD) Signed-off-by: "Paul E. McKenney" --- include/linux/list.h | 10 ---------- include/linux/rculist.h | 10 ++++++++++ kernel/cgroup/dmem.c | 1 + 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index e7e28afd28f8..e7bdad9b8618 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -686,16 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) -/** - * list_for_each_rcu - Iterate over a list in an RCU-safe fashion - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - !list_is_head(pos, (head)); \ - pos = rcu_dereference(pos->next)) - /** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 1b11926ddd47..2abba7552605 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -42,6 +42,16 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) */ #define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) +/** + * list_for_each_rcu - Iterate over a list in an RCU-safe fashion + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + !list_is_head(pos, (head)); \ + pos = rcu_dereference(pos->next)) + /** * list_tail_rcu - returns the prev pointer of the head of the list * @head: the head of the list diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 10b63433f057..e12b946278b6 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -14,6 +14,7 @@ #include #include #include +#include #include struct dmem_cgroup_region { -- cgit v1.2.3 From f45fc18b6de04483643e8aa2ab97737abfe03d59 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Aug 2025 09:56:03 +0200 Subject: net: airoha: Add airoha_ppe_dev struct definition Introduce airoha_ppe_dev struct as container for PPE offload callbacks consumed by the MT76 driver during flowtable offload for traffic received by the wlan NIC and forwarded to the wired one. Add airoha_ppe_setup_tc_block_cb routine to PPE offload ops for MT76 driver. Rely on airoha_ppe_dev pointer in airoha_ppe_setup_tc_block_cb signature. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250823-airoha-en7581-wlan-rx-offload-v3-2-f78600ec3ed8@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 4 +- drivers/net/ethernet/airoha/airoha_eth.h | 4 +- drivers/net/ethernet/airoha/airoha_npu.c | 1 - drivers/net/ethernet/airoha/airoha_ppe.c | 67 +++++++++++++++++++++++++++++-- include/linux/soc/airoha/airoha_offload.h | 35 ++++++++++++++++ 5 files changed, 104 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index e6b802e3d844..5a04f90dd3de 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2599,13 +2599,15 @@ static int airoha_dev_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { struct net_device *dev = cb_priv; + struct airoha_gdm_port *port = netdev_priv(dev); + struct airoha_eth *eth = port->qdma->eth; if (!tc_can_offload(dev)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_CLSFLOWER: - return airoha_ppe_setup_tc_block_cb(dev, type_data); + return airoha_ppe_setup_tc_block_cb(ð->ppe->dev, type_data); case TC_SETUP_CLSMATCHALL: return airoha_dev_tc_matchall(dev, type_data); default: diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 9f721e2b972f..9060b1d2814e 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #define AIROHA_MAX_NUM_GDM_PORTS 4 @@ -546,6 +547,7 @@ struct airoha_gdm_port { #define AIROHA_RXD4_FOE_ENTRY GENMASK(15, 0) struct airoha_ppe { + struct airoha_ppe_dev dev; struct airoha_eth *eth; void *foe; @@ -622,7 +624,7 @@ bool airoha_is_valid_gdm_port(struct airoha_eth *eth, void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, u16 hash); -int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data); +int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port); diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 1a6b191ae0b0..e1d131d6115c 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "airoha_eth.h" diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 36b45e98279a..03d9b1f24bb3 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -6,8 +6,9 @@ #include #include +#include +#include #include -#include #include #include @@ -1282,10 +1283,10 @@ error_npu_put: return err; } -int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data) +int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data) { - struct airoha_gdm_port *port = netdev_priv(dev); - struct airoha_eth *eth = port->qdma->eth; + struct airoha_ppe *ppe = dev->priv; + struct airoha_eth *eth = ppe->eth; int err = 0; mutex_lock(&flow_offload_mutex); @@ -1338,6 +1339,61 @@ void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK); } +struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) +{ + struct platform_device *pdev; + struct device_node *np; + struct airoha_eth *eth; + + np = of_parse_phandle(dev->of_node, "airoha,eth", 0); + if (!np) + return ERR_PTR(-ENODEV); + + pdev = of_find_device_by_node(np); + if (!pdev) { + dev_err(dev, "cannot find device node %s\n", np->name); + of_node_put(np); + return ERR_PTR(-ENODEV); + } + of_node_put(np); + + if (!try_module_get(THIS_MODULE)) { + dev_err(dev, "failed to get the device driver module\n"); + goto error_pdev_put; + } + + eth = platform_get_drvdata(pdev); + if (!eth) + goto error_module_put; + + if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER)) { + dev_err(&pdev->dev, + "failed to create device link to consumer %s\n", + dev_name(dev)); + goto error_module_put; + } + + return ð->ppe->dev; + +error_module_put: + module_put(THIS_MODULE); +error_pdev_put: + platform_device_put(pdev); + + return ERR_PTR(-ENODEV); +} +EXPORT_SYMBOL_GPL(airoha_ppe_get_dev); + +void airoha_ppe_put_dev(struct airoha_ppe_dev *dev) +{ + struct airoha_ppe *ppe = dev->priv; + struct airoha_eth *eth = ppe->eth; + + module_put(THIS_MODULE); + put_device(eth->dev); +} +EXPORT_SYMBOL_GPL(airoha_ppe_put_dev); + int airoha_ppe_init(struct airoha_eth *eth) { struct airoha_ppe *ppe; @@ -1347,6 +1403,9 @@ int airoha_ppe_init(struct airoha_eth *eth) if (!ppe) return -ENOMEM; + ppe->dev.ops.setup_tc_block_cb = airoha_ppe_setup_tc_block_cb; + ppe->dev.priv = ppe; + foe_size = PPE_NUM_ENTRIES * sizeof(struct airoha_foe_entry); ppe->foe = dmam_alloc_coherent(eth->dev, foe_size, &ppe->foe_dma, GFP_KERNEL); diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 117c63c2448d..4b4b8b9e426d 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -9,6 +9,41 @@ #include #include +struct airoha_ppe_dev { + struct { + int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev, + void *type_data); + } ops; + + void *priv; +}; + +#if (IS_BUILTIN(CONFIG_NET_AIROHA) || IS_MODULE(CONFIG_NET_AIROHA)) +struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev); +void airoha_ppe_put_dev(struct airoha_ppe_dev *dev); + +static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev, + void *type_data) +{ + return dev->ops.setup_tc_block_cb(dev, type_data); +} +#else +static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) +{ + return NULL; +} + +static inline void airoha_ppe_put_dev(struct airoha_ppe_dev *dev) +{ +} + +static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, + void *type_data) +{ + return -EOPNOTSUPP; +} +#endif + #define NPU_NUM_CORES 8 #define NPU_NUM_IRQ 6 #define NPU_RX0_DESC_NUM 512 -- cgit v1.2.3 From a7cc1aa151e3a9c0314b995f06102f7763d3bd71 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Aug 2025 09:56:04 +0200 Subject: net: airoha: Introduce check_skb callback in ppe_dev ops Export airoha_ppe_check_skb routine in ppe_dev ops. check_skb callback will be used by the MT76 driver in order to offload the traffic received by the wlan NIC and forwarded to the ethernet one. Add rx_wlan parameter to airoha_ppe_check_skb routine signature. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250823-airoha-en7581-wlan-rx-offload-v3-3-f78600ec3ed8@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 3 ++- drivers/net/ethernet/airoha/airoha_eth.h | 8 ++------ drivers/net/ethernet/airoha/airoha_ppe.c | 25 ++++++++++++++----------- include/linux/soc/airoha/airoha_offload.h | 20 ++++++++++++++++++++ 4 files changed, 38 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 5a04f90dd3de..81ea01a652b9 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -698,7 +698,8 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) reason = FIELD_GET(AIROHA_RXD4_PPE_CPU_REASON, msg1); if (reason == PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) - airoha_ppe_check_skb(eth->ppe, q->skb, hash); + airoha_ppe_check_skb(ð->ppe->dev, q->skb, hash, + false); done++; napi_gro_receive(&q->napi, q->skb); diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 9060b1d2814e..77fd13d466dc 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -229,10 +229,6 @@ struct airoha_hw_stats { u64 rx_len[7]; }; -enum { - PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, -}; - enum { AIROHA_FOE_STATE_INVALID, AIROHA_FOE_STATE_UNBIND, @@ -622,8 +618,8 @@ static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); -void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, - u16 hash); +void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, + u16 hash, bool rx_wlan); int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 03d9b1f24bb3..78473527ff50 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -616,7 +616,7 @@ static bool airoha_ppe_foe_compare_entry(struct airoha_flow_table_entry *e, static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, struct airoha_foe_entry *e, - u32 hash) + u32 hash, bool rx_wlan) { struct airoha_foe_entry *hwe = ppe->foe + hash * sizeof(*hwe); u32 ts = airoha_ppe_get_timestamp(ppe); @@ -639,7 +639,8 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, goto unlock; } - airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); + if (!rx_wlan) + airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); if (hash < PPE_SRAM_NUM_ENTRIES) { dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe); @@ -665,7 +666,7 @@ static void airoha_ppe_foe_remove_flow(struct airoha_ppe *ppe, e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE; e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE, AIROHA_FOE_STATE_INVALID); - airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash); + airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash, false); e->hash = 0xffff; } if (e->type == FLOW_TYPE_L2_SUBFLOW) { @@ -704,7 +705,7 @@ static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe, static int airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, struct airoha_flow_table_entry *e, - u32 hash) + u32 hash, bool rx_wlan) { u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP; struct airoha_foe_entry *hwe_p, hwe; @@ -745,14 +746,14 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, } hwe.bridge.data = e->data.bridge.data; - airoha_ppe_foe_commit_entry(ppe, &hwe, hash); + airoha_ppe_foe_commit_entry(ppe, &hwe, hash, rx_wlan); return 0; } static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, struct sk_buff *skb, - u32 hash) + u32 hash, bool rx_wlan) { struct airoha_flow_table_entry *e; struct airoha_foe_bridge br = {}; @@ -785,7 +786,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, if (!airoha_ppe_foe_compare_entry(e, hwe)) continue; - airoha_ppe_foe_commit_entry(ppe, &e->data, hash); + airoha_ppe_foe_commit_entry(ppe, &e->data, hash, rx_wlan); commit_done = true; e->hash = hash; } @@ -797,7 +798,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, e = rhashtable_lookup_fast(&ppe->l2_flows, &br, airoha_l2_flow_table_params); if (e) - airoha_ppe_foe_commit_subflow_entry(ppe, e, hash); + airoha_ppe_foe_commit_subflow_entry(ppe, e, hash, rx_wlan); unlock: spin_unlock_bh(&ppe_lock); } @@ -1301,9 +1302,10 @@ int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data) return err; } -void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, - u16 hash) +void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, + u16 hash, bool rx_wlan) { + struct airoha_ppe *ppe = dev->priv; u16 now, diff; if (hash > PPE_HASH_MASK) @@ -1315,7 +1317,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, return; ppe->foe_check_time[hash] = now; - airoha_ppe_foe_insert_entry(ppe, skb, hash); + airoha_ppe_foe_insert_entry(ppe, skb, hash, rx_wlan); } void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) @@ -1404,6 +1406,7 @@ int airoha_ppe_init(struct airoha_eth *eth) return -ENOMEM; ppe->dev.ops.setup_tc_block_cb = airoha_ppe_setup_tc_block_cb; + ppe->dev.ops.check_skb = airoha_ppe_check_skb; ppe->dev.priv = ppe; foe_size = PPE_NUM_ENTRIES * sizeof(struct airoha_foe_entry); diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 4b4b8b9e426d..1dc5b4e35ef9 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -9,10 +9,17 @@ #include #include +enum { + PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, +}; + struct airoha_ppe_dev { struct { int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev, void *type_data); + void (*check_skb)(struct airoha_ppe_dev *dev, + struct sk_buff *skb, u16 hash, + bool rx_wlan); } ops; void *priv; @@ -27,6 +34,13 @@ static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev, { return dev->ops.setup_tc_block_cb(dev, type_data); } + +static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, + struct sk_buff *skb, + u16 hash, bool rx_wlan) +{ + dev->ops.check_skb(dev, skb, hash, rx_wlan); +} #else static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) { @@ -42,6 +56,12 @@ static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, { return -EOPNOTSUPP; } + +static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, + struct sk_buff *skb, u16 hash, + bool rx_wlan) +{ +} #endif #define NPU_NUM_CORES 8 -- cgit v1.2.3 From f1241200cd66b3e25fd2a44dd961d9720e965aa1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Aug 2025 19:07:00 +0000 Subject: tcp: Don't pass hashinfo to inet_diag helpers. These inet_diag functions required struct inet_hashinfo because they are shared by TCP and DCCP: * inet_diag_dump_icsk() * inet_diag_dump_one_icsk() * inet_diag_find_one_icsk() DCCP has gone, and we don't need to pass hashinfo down to them. Let's fetch net->ipv4.tcp_death_row.hashinfo directly in the first 2 functions. Note that inet_diag_find_one_icsk() don't need hashinfo since the previous patch. We will move TCP-specific functions to tcp_diag.c in the next patch. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250822190803.540788-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/inet_diag.h | 6 ++---- net/ipv4/inet_diag.c | 10 +++++----- net/ipv4/tcp_diag.c | 17 +++-------------- 3 files changed, 10 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index a9033696b0aa..34de992b5bd9 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -48,15 +48,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, u16 nlmsg_flags, bool net_admin); -void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, +void inet_diag_dump_icsk(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r); -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct netlink_callback *cb, +int inet_diag_dump_one_icsk(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); struct sock *inet_diag_find_one_icsk(struct net *net, - struct inet_hashinfo *hashinfo, const struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 462406948c84..5cbbb0695aff 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -519,7 +519,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, } struct sock *inet_diag_find_one_icsk(struct net *net, - struct inet_hashinfo *hashinfo, const struct inet_diag_req_v2 *req) { struct sock *sk; @@ -562,8 +561,7 @@ struct sock *inet_diag_find_one_icsk(struct net *net, } EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct netlink_callback *cb, +int inet_diag_dump_one_icsk(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { struct sk_buff *in_skb = cb->skb; @@ -573,7 +571,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sock *sk; int err; - sk = inet_diag_find_one_icsk(net, hashinfo, req); + sk = inet_diag_find_one_icsk(net, req); if (IS_ERR(sk)) return PTR_ERR(sk); @@ -1018,7 +1016,7 @@ static void twsk_build_assert(void) #endif } -void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, +void inet_diag_dump_icsk(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { @@ -1026,10 +1024,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct inet_diag_dump_data *cb_data = cb->data; struct net *net = sock_net(skb->sk); u32 idiag_states = r->idiag_states; + struct inet_hashinfo *hashinfo; int i, num, s_i, s_num; struct nlattr *bc; struct sock *sk; + hashinfo = net->ipv4.tcp_death_row.hashinfo; bc = cb_data->inet_diag_nla_bc; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 45e174b8cd22..7cd9d032efdd 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -180,21 +180,13 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { - struct inet_hashinfo *hinfo; - - hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; - - inet_diag_dump_icsk(hinfo, skb, cb, r); + inet_diag_dump_icsk(skb, cb, r); } static int tcp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - struct inet_hashinfo *hinfo; - - hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; - - return inet_diag_dump_one_icsk(hinfo, cb, req); + return inet_diag_dump_one_icsk(cb, req); } #ifdef CONFIG_INET_DIAG_DESTROY @@ -202,13 +194,10 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); - struct inet_hashinfo *hinfo; struct sock *sk; int err; - hinfo = net->ipv4.tcp_death_row.hashinfo; - sk = inet_diag_find_one_icsk(net, hinfo, req); - + sk = inet_diag_find_one_icsk(net, req); if (IS_ERR(sk)) return PTR_ERR(sk); -- cgit v1.2.3 From 382a4d9cb6dc07643345e15c49738088a727d29b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Aug 2025 19:07:01 +0000 Subject: tcp: Move TCP-specific diag functions to tcp_diag.c. tcp_diag_dump() / tcp_diag_dump_one() is just a wrapper of inet_diag_dump_icsk() / inet_diag_dump_one_icsk(), respectively. Let's inline them in tcp_diag.c and move static callees as well. Note that inet_sk_attr_size() is merged into tcp_diag_get_aux_size(), and we remove inet_diag_handler.idiag_get_aux_size() accordingly. While at it, BUG_ON() is replaced with DEBUG_NET_WARN_ON_ONCE(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250822190803.540788-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/inet_diag.h | 11 -- net/ipv4/inet_diag.c | 479 ---------------------------------------------- net/ipv4/tcp_diag.c | 460 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 455 insertions(+), 495 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 34de992b5bd9..30bf8f7ea62b 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,9 +24,6 @@ struct inet_diag_handler { bool net_admin, struct sk_buff *skb); - size_t (*idiag_get_aux_size)(struct sock *sk, - bool net_admin); - int (*destroy)(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req); @@ -48,14 +45,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, u16 nlmsg_flags, bool net_admin); -void inet_diag_dump_icsk(struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r); -int inet_diag_dump_one_icsk(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req); - -struct sock *inet_diag_find_one_icsk(struct net *net, - const struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5cbbb0695aff..9d4dcd17728c 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -20,9 +20,6 @@ #include #include #include -#include -#include -#include #include #include @@ -97,31 +94,6 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) } EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); -static size_t inet_sk_attr_size(struct sock *sk, - const struct inet_diag_req_v2 *req, - bool net_admin) -{ - const struct inet_diag_handler *handler; - size_t aux = 0; - - rcu_read_lock(); - handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]); - DEBUG_NET_WARN_ON_ONCE(!handler); - if (handler && handler->idiag_get_aux_size) - aux = handler->idiag_get_aux_size(sk, net_admin); - rcu_read_unlock(); - - return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(sizeof(struct inet_diag_msg)) - + inet_diag_msg_attrs_size() - + nla_total_size(sizeof(struct inet_diag_meminfo)) - + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) - + nla_total_size(TCP_CA_NAME_MAX) - + nla_total_size(sizeof(struct tcpvegas_info)) - + aux - + 64; -} - int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, @@ -422,181 +394,6 @@ errout: } EXPORT_SYMBOL_GPL(inet_sk_diag_fill); -static int inet_twsk_diag_fill(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - u16 nlmsg_flags, bool net_admin) -{ - struct inet_timewait_sock *tw = inet_twsk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; - - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, - sizeof(*r), nlmsg_flags); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - BUG_ON(tw->tw_state != TCP_TIME_WAIT); - - inet_diag_msg_common_fill(r, sk); - r->idiag_retrans = 0; - - r->idiag_state = READ_ONCE(tw->tw_substate); - r->idiag_timer = 3; - tmo = tw->tw_timer.expires - jiffies; - r->idiag_expires = jiffies_delta_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; - - if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - tw->tw_mark)) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } - - nlmsg_end(skb, nlh); - return 0; -} - -static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, - u16 nlmsg_flags, bool net_admin) -{ - struct request_sock *reqsk = inet_reqsk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; - - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - inet_diag_msg_common_fill(r, sk); - r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; - r->idiag_retrans = reqsk->num_retrans; - - BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != - offsetof(struct sock, sk_cookie)); - - tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; - r->idiag_expires = jiffies_delta_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; - - if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - inet_rsk(reqsk)->ir_mark)) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } - - nlmsg_end(skb, nlh); - return 0; -} - -static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - u16 nlmsg_flags, bool net_admin) -{ - if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); - - if (sk->sk_state == TCP_NEW_SYN_RECV) - return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); - - return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, - net_admin); -} - -struct sock *inet_diag_find_one_icsk(struct net *net, - const struct inet_diag_req_v2 *req) -{ - struct sock *sk; - - rcu_read_lock(); - if (req->sdiag_family == AF_INET) - sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[0], - req->id.idiag_dport, req->id.idiag_src[0], - req->id.idiag_sport, req->id.idiag_if); -#if IS_ENABLED(CONFIG_IPV6) - else if (req->sdiag_family == AF_INET6) { - if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && - ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) - sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[3], - req->id.idiag_dport, req->id.idiag_src[3], - req->id.idiag_sport, req->id.idiag_if); - else - sk = inet6_lookup(net, NULL, 0, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); - } -#endif - else { - rcu_read_unlock(); - return ERR_PTR(-EINVAL); - } - rcu_read_unlock(); - if (!sk) - return ERR_PTR(-ENOENT); - - if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { - sock_gen_put(sk); - return ERR_PTR(-ENOENT); - } - - return sk; -} -EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); - -int inet_diag_dump_one_icsk(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) -{ - struct sk_buff *in_skb = cb->skb; - bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; - struct sock *sk; - int err; - - sk = inet_diag_find_one_icsk(net, req); - if (IS_ERR(sk)) - return PTR_ERR(sk); - - rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); - if (!rep) { - err = -ENOMEM; - goto out; - } - - err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); - if (err < 0) { - WARN_ON(err == -EMSGSIZE); - nlmsg_free(rep); - goto out; - } - err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); - -out: - if (sk) - sock_gen_put(sk); - - return err; -} -EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); - static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, int hdrlen, @@ -990,282 +787,6 @@ static int inet_diag_bc_audit(const struct nlattr *attr, return len == 0 ? 0 : -EINVAL; } -static void twsk_build_assert(void) -{ - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != - offsetof(struct sock, sk_family)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != - offsetof(struct inet_sock, inet_num)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != - offsetof(struct inet_sock, inet_dport)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != - offsetof(struct inet_sock, inet_rcv_saddr)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != - offsetof(struct inet_sock, inet_daddr)); - -#if IS_ENABLED(CONFIG_IPV6) - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != - offsetof(struct sock, sk_v6_rcv_saddr)); - - BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != - offsetof(struct sock, sk_v6_daddr)); -#endif -} - -void inet_diag_dump_icsk(struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) -{ - bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); - struct inet_diag_dump_data *cb_data = cb->data; - struct net *net = sock_net(skb->sk); - u32 idiag_states = r->idiag_states; - struct inet_hashinfo *hashinfo; - int i, num, s_i, s_num; - struct nlattr *bc; - struct sock *sk; - - hashinfo = net->ipv4.tcp_death_row.hashinfo; - bc = cb_data->inet_diag_nla_bc; - if (idiag_states & TCPF_SYN_RECV) - idiag_states |= TCPF_NEW_SYN_RECV; - s_i = cb->args[1]; - s_num = num = cb->args[2]; - - if (cb->args[0] == 0) { - if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) - goto skip_listen_ht; - - for (i = s_i; i <= hashinfo->lhash2_mask; i++) { - struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; - - num = 0; - ilb = &hashinfo->lhash2[i]; - - if (hlist_nulls_empty(&ilb->nulls_head)) { - s_num = 0; - continue; - } - spin_lock(&ilb->lock); - sk_nulls_for_each(sk, node, &ilb->nulls_head) { - struct inet_sock *inet = inet_sk(sk); - - if (!net_eq(sock_net(sk), net)) - continue; - - if (num < s_num) { - num++; - continue; - } - - if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) - goto next_listen; - - if (r->id.idiag_sport != inet->inet_sport && - r->id.idiag_sport) - goto next_listen; - - if (!inet_diag_bc_sk(bc, sk)) - goto next_listen; - - if (inet_sk_diag_fill(sk, inet_csk(sk), skb, - cb, r, NLM_F_MULTI, - net_admin) < 0) { - spin_unlock(&ilb->lock); - goto done; - } - -next_listen: - ++num; - } - spin_unlock(&ilb->lock); - - s_num = 0; - } -skip_listen_ht: - cb->args[0] = 1; - s_i = num = s_num = 0; - } - -/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets - * with bh disabled. - */ -#define SKARR_SZ 16 - - /* Dump bound but inactive (not listening, connecting, etc.) sockets */ - if (cb->args[0] == 1) { - if (!(idiag_states & TCPF_BOUND_INACTIVE)) - goto skip_bind_ht; - - for (i = s_i; i < hashinfo->bhash_size; i++) { - struct inet_bind_hashbucket *ibb; - struct inet_bind2_bucket *tb2; - struct sock *sk_arr[SKARR_SZ]; - int num_arr[SKARR_SZ]; - int idx, accum, res; - -resume_bind_walk: - num = 0; - accum = 0; - ibb = &hashinfo->bhash2[i]; - - if (hlist_empty(&ibb->chain)) { - s_num = 0; - continue; - } - spin_lock_bh(&ibb->lock); - inet_bind_bucket_for_each(tb2, &ibb->chain) { - if (!net_eq(ib2_net(tb2), net)) - continue; - - sk_for_each_bound(sk, &tb2->owners) { - struct inet_sock *inet = inet_sk(sk); - - if (num < s_num) - goto next_bind; - - if (sk->sk_state != TCP_CLOSE || - !inet->inet_num) - goto next_bind; - - if (r->sdiag_family != AF_UNSPEC && - r->sdiag_family != sk->sk_family) - goto next_bind; - - if (!inet_diag_bc_sk(bc, sk)) - goto next_bind; - - sock_hold(sk); - num_arr[accum] = num; - sk_arr[accum] = sk; - if (++accum == SKARR_SZ) - goto pause_bind_walk; -next_bind: - num++; - } - } -pause_bind_walk: - spin_unlock_bh(&ibb->lock); - - res = 0; - for (idx = 0; idx < accum; idx++) { - if (res >= 0) { - res = inet_sk_diag_fill(sk_arr[idx], - NULL, skb, cb, - r, NLM_F_MULTI, - net_admin); - if (res < 0) - num = num_arr[idx]; - } - sock_put(sk_arr[idx]); - } - if (res < 0) - goto done; - - cond_resched(); - - if (accum == SKARR_SZ) { - s_num = num + 1; - goto resume_bind_walk; - } - - s_num = 0; - } -skip_bind_ht: - cb->args[0] = 2; - s_i = num = s_num = 0; - } - - if (!(idiag_states & ~TCPF_LISTEN)) - goto out; - - for (i = s_i; i <= hashinfo->ehash_mask; i++) { - struct inet_ehash_bucket *head = &hashinfo->ehash[i]; - spinlock_t *lock = inet_ehash_lockp(hashinfo, i); - struct hlist_nulls_node *node; - struct sock *sk_arr[SKARR_SZ]; - int num_arr[SKARR_SZ]; - int idx, accum, res; - - if (hlist_nulls_empty(&head->chain)) - continue; - - if (i > s_i) - s_num = 0; - -next_chunk: - num = 0; - accum = 0; - spin_lock_bh(lock); - sk_nulls_for_each(sk, node, &head->chain) { - int state; - - if (!net_eq(sock_net(sk), net)) - continue; - if (num < s_num) - goto next_normal; - state = (sk->sk_state == TCP_TIME_WAIT) ? - READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; - if (!(idiag_states & (1 << state))) - goto next_normal; - if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) - goto next_normal; - if (r->id.idiag_sport != htons(sk->sk_num) && - r->id.idiag_sport) - goto next_normal; - if (r->id.idiag_dport != sk->sk_dport && - r->id.idiag_dport) - goto next_normal; - twsk_build_assert(); - - if (!inet_diag_bc_sk(bc, sk)) - goto next_normal; - - if (!refcount_inc_not_zero(&sk->sk_refcnt)) - goto next_normal; - - num_arr[accum] = num; - sk_arr[accum] = sk; - if (++accum == SKARR_SZ) - break; -next_normal: - ++num; - } - spin_unlock_bh(lock); - res = 0; - for (idx = 0; idx < accum; idx++) { - if (res >= 0) { - res = sk_diag_fill(sk_arr[idx], skb, cb, r, - NLM_F_MULTI, net_admin); - if (res < 0) - num = num_arr[idx]; - } - sock_gen_put(sk_arr[idx]); - } - if (res < 0) - break; - cond_resched(); - if (accum == SKARR_SZ) { - s_num = num + 1; - goto next_chunk; - } - } - -done: - cb->args[1] = i; - cb->args[2] = num; -out: - ; -} -EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); - static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 7cd9d032efdd..2f3a779ce7a2 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -12,6 +12,9 @@ #include +#include +#include +#include #include #include @@ -174,19 +177,467 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) size += ulp_ops->get_info_size(sk, net_admin); } } - return size; + + return size + + nla_total_size(sizeof(struct tcp_info)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + + nla_total_size(TCP_CA_NAME_MAX) + + nla_total_size(sizeof(struct tcpvegas_info)) + + 64; +} + +static int tcp_twsk_diag_fill(struct sock *sk, + struct sk_buff *skb, + struct netlink_callback *cb, + u16 nlmsg_flags, bool net_admin) +{ + struct inet_timewait_sock *tw = inet_twsk(sk); + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, + sizeof(*r), nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + DEBUG_NET_WARN_ON_ONCE(tw->tw_state != TCP_TIME_WAIT); + + inet_diag_msg_common_fill(r, sk); + r->idiag_retrans = 0; + + r->idiag_state = READ_ONCE(tw->tw_substate); + r->idiag_timer = 3; + tmo = tw->tw_timer.expires - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; + + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, + tw->tw_mark)) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + u16 nlmsg_flags, bool net_admin) +{ + struct request_sock *reqsk = inet_reqsk(sk); + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + inet_diag_msg_common_fill(r, sk); + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = reqsk->num_retrans; + + BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != + offsetof(struct sock, sk_cookie)); + + tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; + + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, + inet_rsk(reqsk)->ir_mark)) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + u16 nlmsg_flags, bool net_admin) +{ + if (sk->sk_state == TCP_TIME_WAIT) + return tcp_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); + + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); + + return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, + net_admin); +} + +static void twsk_build_assert(void) +{ + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != + offsetof(struct sock, sk_family)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != + offsetof(struct inet_sock, inet_num)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != + offsetof(struct inet_sock, inet_dport)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != + offsetof(struct inet_sock, inet_rcv_saddr)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != + offsetof(struct inet_sock, inet_daddr)); + +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != + offsetof(struct sock, sk_v6_rcv_saddr)); + + BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != + offsetof(struct sock, sk_v6_daddr)); +#endif } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { - inet_diag_dump_icsk(skb, cb, r); + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct inet_diag_dump_data *cb_data = cb->data; + struct net *net = sock_net(skb->sk); + u32 idiag_states = r->idiag_states; + struct inet_hashinfo *hashinfo; + int i, num, s_i, s_num; + struct nlattr *bc; + struct sock *sk; + + hashinfo = net->ipv4.tcp_death_row.hashinfo; + bc = cb_data->inet_diag_nla_bc; + if (idiag_states & TCPF_SYN_RECV) + idiag_states |= TCPF_NEW_SYN_RECV; + s_i = cb->args[1]; + s_num = num = cb->args[2]; + + if (cb->args[0] == 0) { + if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) + goto skip_listen_ht; + + for (i = s_i; i <= hashinfo->lhash2_mask; i++) { + struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; + + num = 0; + ilb = &hashinfo->lhash2[i]; + + if (hlist_nulls_empty(&ilb->nulls_head)) { + s_num = 0; + continue; + } + spin_lock(&ilb->lock); + sk_nulls_for_each(sk, node, &ilb->nulls_head) { + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net)) + continue; + + if (num < s_num) { + num++; + continue; + } + + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_listen; + + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next_listen; + + if (!inet_diag_bc_sk(bc, sk)) + goto next_listen; + + if (inet_sk_diag_fill(sk, inet_csk(sk), skb, + cb, r, NLM_F_MULTI, + net_admin) < 0) { + spin_unlock(&ilb->lock); + goto done; + } + +next_listen: + ++num; + } + spin_unlock(&ilb->lock); + + s_num = 0; + } +skip_listen_ht: + cb->args[0] = 1; + s_i = num = s_num = 0; + } + +/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets + * with bh disabled. + */ +#define SKARR_SZ 16 + + /* Dump bound but inactive (not listening, connecting, etc.) sockets */ + if (cb->args[0] == 1) { + if (!(idiag_states & TCPF_BOUND_INACTIVE)) + goto skip_bind_ht; + + for (i = s_i; i < hashinfo->bhash_size; i++) { + struct inet_bind_hashbucket *ibb; + struct inet_bind2_bucket *tb2; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; + +resume_bind_walk: + num = 0; + accum = 0; + ibb = &hashinfo->bhash2[i]; + + if (hlist_empty(&ibb->chain)) { + s_num = 0; + continue; + } + spin_lock_bh(&ibb->lock); + inet_bind_bucket_for_each(tb2, &ibb->chain) { + if (!net_eq(ib2_net(tb2), net)) + continue; + + sk_for_each_bound(sk, &tb2->owners) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next_bind; + + if (sk->sk_state != TCP_CLOSE || + !inet->inet_num) + goto next_bind; + + if (r->sdiag_family != AF_UNSPEC && + r->sdiag_family != sk->sk_family) + goto next_bind; + + if (!inet_diag_bc_sk(bc, sk)) + goto next_bind; + + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + goto pause_bind_walk; +next_bind: + num++; + } + } +pause_bind_walk: + spin_unlock_bh(&ibb->lock); + + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = inet_sk_diag_fill(sk_arr[idx], + NULL, skb, cb, + r, NLM_F_MULTI, + net_admin); + if (res < 0) + num = num_arr[idx]; + } + sock_put(sk_arr[idx]); + } + if (res < 0) + goto done; + + cond_resched(); + + if (accum == SKARR_SZ) { + s_num = num + 1; + goto resume_bind_walk; + } + + s_num = 0; + } +skip_bind_ht: + cb->args[0] = 2; + s_i = num = s_num = 0; + } + + if (!(idiag_states & ~TCPF_LISTEN)) + goto out; + + for (i = s_i; i <= hashinfo->ehash_mask; i++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[i]; + spinlock_t *lock = inet_ehash_lockp(hashinfo, i); + struct hlist_nulls_node *node; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; + + if (hlist_nulls_empty(&head->chain)) + continue; + + if (i > s_i) + s_num = 0; + +next_chunk: + num = 0; + accum = 0; + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &head->chain) { + int state; + + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next_normal; + state = (sk->sk_state == TCP_TIME_WAIT) ? + READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; + if (!(idiag_states & (1 << state))) + goto next_normal; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_normal; + if (r->id.idiag_sport != htons(sk->sk_num) && + r->id.idiag_sport) + goto next_normal; + if (r->id.idiag_dport != sk->sk_dport && + r->id.idiag_dport) + goto next_normal; + twsk_build_assert(); + + if (!inet_diag_bc_sk(bc, sk)) + goto next_normal; + + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + goto next_normal; + + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + break; +next_normal: + ++num; + } + spin_unlock_bh(lock); + + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = sk_diag_fill(sk_arr[idx], skb, cb, r, + NLM_F_MULTI, net_admin); + if (res < 0) + num = num_arr[idx]; + } + sock_gen_put(sk_arr[idx]); + } + if (res < 0) + break; + + cond_resched(); + + if (accum == SKARR_SZ) { + s_num = num + 1; + goto next_chunk; + } + } + +done: + cb->args[1] = i; + cb->args[2] = num; +out: + ; +} + +static struct sock *tcp_diag_find_one_icsk(struct net *net, + const struct inet_diag_req_v2 *req) +{ + struct sock *sk; + + rcu_read_lock(); + if (req->sdiag_family == AF_INET) { + sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[0], + req->id.idiag_dport, req->id.idiag_src[0], + req->id.idiag_sport, req->id.idiag_if); +#if IS_ENABLED(CONFIG_IPV6) + } else if (req->sdiag_family == AF_INET6) { + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[3], + req->id.idiag_dport, req->id.idiag_src[3], + req->id.idiag_sport, req->id.idiag_if); + else + sk = inet6_lookup(net, NULL, 0, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); +#endif + } else { + rcu_read_unlock(); + return ERR_PTR(-EINVAL); + } + rcu_read_unlock(); + if (!sk) + return ERR_PTR(-ENOENT); + + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; } static int tcp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return inet_diag_dump_one_icsk(cb, req); + struct sk_buff *in_skb = cb->skb; + struct sk_buff *rep; + struct sock *sk; + struct net *net; + bool net_admin; + int err; + + net = sock_net(in_skb->sk); + sk = tcp_diag_find_one_icsk(net, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); + + net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); + rep = nlmsg_new(tcp_diag_get_aux_size(sk, net_admin), GFP_KERNEL); + if (!rep) { + err = -ENOMEM; + goto out; + } + + err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + nlmsg_free(rep); + goto out; + } + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + +out: + if (sk) + sock_gen_put(sk); + + return err; } #ifdef CONFIG_INET_DIAG_DESTROY @@ -197,7 +648,7 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, struct sock *sk; int err; - sk = inet_diag_find_one_icsk(net, req); + sk = tcp_diag_find_one_icsk(net, req); if (IS_ERR(sk)) return PTR_ERR(sk); @@ -215,7 +666,6 @@ static const struct inet_diag_handler tcp_diag_handler = { .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_get_aux = tcp_diag_get_aux, - .idiag_get_aux_size = tcp_diag_get_aux_size, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), #ifdef CONFIG_INET_DIAG_DESTROY -- cgit v1.2.3 From df534e757321ae6efe848a6a787098c22a390ac6 Mon Sep 17 00:00:00 2001 From: David Yang Date: Sun, 24 Aug 2025 09:30:03 +0800 Subject: net: phylink: remove stale an_enabled from doc state->an_enabled was removed by commit 4ee9b0dcf09f ("net: phylink: remove an_enabled") but is left in mac_config() doc, so clean it. Signed-off-by: David Yang Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250824013009.2443580-1-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phylink.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 30659b615fca..9af0411761d7 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -320,9 +320,8 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * If in 802.3z mode, the link speed is fixed, dependent on the * @state->interface. Duplex and pause modes are negotiated via * the in-band configuration word. Advertised pause modes are set - * according to the @state->an_enabled and @state->advertising - * flags. Beware of MACs which only support full duplex at gigabit - * and higher speeds. + * according to @state->advertising. Beware of MACs which only + * support full duplex at gigabit and higher speeds. * * If in Cisco SGMII mode, the link speed and duplex mode are passed * in the serial bitstream 16-bit configuration word, and the MAC @@ -331,7 +330,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * responsible for reading the configuration word and configuring * itself accordingly. * - * Valid state members: interface, an_enabled, pause, advertising. + * Valid state members: interface, pause, advertising. * * Implementations are expected to update the MAC to reflect the * requested settings - i.o.w., if nothing has changed between two -- cgit v1.2.3 From 1b93c03fb319d72a1f5f4723abd5df15ce40f4e2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 21 Aug 2025 17:06:03 +0800 Subject: rcu: add rcu_read_lock_dont_migrate() migrate_disable() is called to disable migration in the kernel, and it is often used together with rcu_read_lock(). However, with PREEMPT_RCU disabled, it's unnecessary, as rcu_read_lock() will always disable preemption, which will also disable migration. Introduce rcu_read_lock_dont_migrate() and rcu_read_unlock_migrate(), which will do the migration enable and disable only when PREEMPT_RCU. Signed-off-by: Menglong Dong Reviewed-by: Paul E. McKenney Link: https://lore.kernel.org/r/20250821090609.42508-2-dongml2@chinatelecom.cn Signed-off-by: Alexei Starovoitov --- include/linux/rcupdate.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..9691ca380a4f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -962,6 +962,20 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } +static __always_inline void rcu_read_lock_dont_migrate(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) + migrate_disable(); + rcu_read_lock(); +} + +static inline void rcu_read_unlock_migrate(void) +{ + rcu_read_unlock(); + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) + migrate_enable(); +} + /** * RCU_INIT_POINTER() - initialize an RCU protected pointer * @p: The pointer to be initialized. -- cgit v1.2.3 From 05db35963eef7a55f1782190185cb8ddb9d923b7 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Wed, 20 Aug 2025 13:58:47 +0530 Subject: OPP: Add support to find OPP for a set of keys Some clients, such as PCIe, may operate at the same clock frequency across different data rates by varying link width. In such cases, frequency alone is not sufficient to uniquely identify an OPP. To support these scenarios, introduce a new API dev_pm_opp_find_key_exact() that allows OPP lookup with different set of keys like freq, level & bandwidth. Signed-off-by: Krishna Chaitanya Chundru [ Viresh: Minor cleanups ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 30 +++++++++++++++ 2 files changed, 129 insertions(+) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index edbd60501cf0..bba4f7daff8c 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -476,6 +476,16 @@ static unsigned long _read_bw(struct dev_pm_opp *opp, int index) return opp->bandwidth[index].peak; } +static unsigned long _read_opp_key(struct dev_pm_opp *opp, int index, + struct dev_pm_opp_key *key) +{ + key->bw = opp->bandwidth ? opp->bandwidth[index].peak : 0; + key->freq = opp->rates[index]; + key->level = opp->level; + + return true; +} + /* Generic comparison helpers */ static bool _compare_exact(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp, unsigned long opp_key, unsigned long key) @@ -509,6 +519,22 @@ static bool _compare_floor(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp, return false; } +static bool _compare_opp_key_exact(struct dev_pm_opp **opp, + struct dev_pm_opp *temp_opp, struct dev_pm_opp_key *opp_key, + struct dev_pm_opp_key *key) +{ + bool level_match = (key->level == OPP_LEVEL_UNSET || opp_key->level == key->level); + bool freq_match = (key->freq == 0 || opp_key->freq == key->freq); + bool bw_match = (key->bw == 0 || opp_key->bw == key->bw); + + if (freq_match && level_match && bw_match) { + *opp = temp_opp; + return true; + } + + return false; +} + /* Generic key finding helpers */ static struct dev_pm_opp *_opp_table_find_key(struct opp_table *opp_table, unsigned long *key, int index, bool available, @@ -541,6 +567,37 @@ static struct dev_pm_opp *_opp_table_find_key(struct opp_table *opp_table, return opp; } +static struct dev_pm_opp *_opp_table_find_opp_key(struct opp_table *opp_table, + struct dev_pm_opp_key *key, bool available, + unsigned long (*read)(struct dev_pm_opp *opp, int index, + struct dev_pm_opp_key *key), + bool (*compare)(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp, + struct dev_pm_opp_key *opp_key, struct dev_pm_opp_key *key), + bool (*assert)(struct opp_table *opp_table, unsigned int index)) +{ + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + struct dev_pm_opp_key temp_key; + + /* Assert that the requirement is met */ + if (!assert(opp_table, 0)) + return ERR_PTR(-EINVAL); + + guard(mutex)(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available == available) { + read(temp_opp, 0, &temp_key); + if (compare(&opp, temp_opp, &temp_key, key)) { + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + } + + return opp; +} + static struct dev_pm_opp * _find_key(struct device *dev, unsigned long *key, int index, bool available, unsigned long (*read)(struct dev_pm_opp *opp, int index), @@ -632,6 +689,48 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); +/** + * dev_pm_opp_find_key_exact() - Search for an OPP with exact key set + * @dev: Device for which the OPP is being searched + * @key: OPP key set to match + * @available: true/false - match for available OPP + * + * Search for an exact match of the key set in the OPP table. + * + * Return: A matching opp on success, else ERR_PTR in case of error. + * Possible error values: + * EINVAL: for bad pointers + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * Note: 'available' is a modifier for the search. If 'available' == true, + * then the match is for exact matching key and is available in the stored + * OPP table. If false, the match is for exact key which is not available. + * + * This provides a mechanism to enable an OPP which is not available currently + * or the opposite as well. + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available) +{ + struct opp_table *opp_table __free(put_opp_table) = _find_opp_table(dev); + + if (IS_ERR(opp_table)) { + dev_err(dev, "%s: OPP table not found (%ld)\n", __func__, + PTR_ERR(opp_table)); + return ERR_CAST(opp_table); + } + + return _opp_table_find_opp_key(opp_table, key, available, + _read_opp_key, _compare_opp_key_exact, + assert_single_clk); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_key_exact); + /** * dev_pm_opp_find_freq_exact_indexed() - Search for an exact freq for the * clock corresponding to the index diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cf477beae4bb..789406d95e69 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -98,6 +98,25 @@ struct dev_pm_opp_data { unsigned long u_volt; }; +/** + * struct dev_pm_opp_key - Key used to identify OPP entries + * @freq: Frequency in Hz. Use 0 if frequency is not to be matched. + * @level: Performance level associated with the OPP entry. + * Use OPP_LEVEL_UNSET if level is not to be matched. + * @bw: Bandwidth associated with the OPP entry. + * Use 0 if bandwidth is not to be matched. + * + * This structure is used to uniquely identify an OPP entry based on + * frequency, performance level, and bandwidth. Each field can be + * selectively ignored during matching by setting it to its respective + * NOP value. + */ +struct dev_pm_opp_key { + unsigned long freq; + unsigned int level; + u32 bw; +}; + #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); @@ -131,6 +150,10 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); +struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available); + struct dev_pm_opp * dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, u32 index, bool available); @@ -289,6 +312,13 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp * dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, u32 index, bool available) -- cgit v1.2.3 From e649bcda25b5ae1a30a182cc450f928a0b282c93 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 20 Aug 2025 14:03:39 -0400 Subject: perf: Remove get_perf_callchain() init_nr argument The 'init_nr' argument has double duty: it's used to initialize both the number of contexts and the number of stack entries. That's confusing and the callers always pass zero anyway. Hard code the zero. Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Namhyung Kim Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20250820180428.259565081@kernel.org --- include/linux/perf_event.h | 2 +- kernel/bpf/stackmap.c | 4 ++-- kernel/events/callchain.c | 12 ++++++------ kernel/events/core.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bfbf9ea53f25..fd1d91017b99 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1719,7 +1719,7 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * -get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, +get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 3615c06b7dfa..ec3a57a5fba1 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -314,7 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, if (max_depth > sysctl_perf_event_max_stack) max_depth = sysctl_perf_event_max_stack; - trace = get_perf_callchain(regs, 0, kernel, user, max_depth, + trace = get_perf_callchain(regs, kernel, user, max_depth, false, false); if (unlikely(!trace)) @@ -451,7 +451,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, else if (kernel && task) trace = get_callchain_entry_for_task(task, max_depth); else - trace = get_perf_callchain(regs, 0, kernel, user, max_depth, + trace = get_perf_callchain(regs, kernel, user, max_depth, crosstask, false); if (unlikely(!trace) || trace->nr < skip) { diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 6c83ad674d01..b0f5bd228cd8 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -217,7 +217,7 @@ static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entr } struct perf_callchain_entry * -get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, +get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark) { struct perf_callchain_entry *entry; @@ -228,11 +228,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, if (!entry) return NULL; - ctx.entry = entry; - ctx.max_stack = max_stack; - ctx.nr = entry->nr = init_nr; - ctx.contexts = 0; - ctx.contexts_maxed = false; + ctx.entry = entry; + ctx.max_stack = max_stack; + ctx.nr = entry->nr = 0; + ctx.contexts = 0; + ctx.contexts_maxed = false; if (kernel && !user_mode(regs)) { if (add_mark) diff --git a/kernel/events/core.c b/kernel/events/core.c index ea357044d780..bade8e0fced7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8210,7 +8210,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) if (!kernel && !user) return &__empty_callchain; - callchain = get_perf_callchain(regs, 0, kernel, user, + callchain = get_perf_callchain(regs, kernel, user, max_stack, crosstask, true); return callchain ?: &__empty_callchain; } -- cgit v1.2.3 From d6d05e2af796ca25094f80a73d8841505d54368b Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Jul 2025 17:13:02 +0200 Subject: video: screen_info: Add pixel-format helper for linear framebuffers Add screen_info_pixel_format(), which converts a screen_info's information about the color format to struct pixel_format. The encoding within the screen_info structure is complex and therefore prone to errors. Later patches will convert callers to use the pixel format. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250714151513.309475-3-tzimmermann@suse.de --- drivers/video/screen_info_generic.c | 55 +++++++++++++++++++++++++++++++++++++ include/linux/screen_info.h | 2 ++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/screen_info_generic.c b/drivers/video/screen_info_generic.c index 900e9386eceb..763adbba71cb 100644 --- a/drivers/video/screen_info_generic.c +++ b/drivers/video/screen_info_generic.c @@ -5,6 +5,8 @@ #include #include +#include