From c8770bcf5cefa8cbfae21c07c4fe3428f5a9d42a Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Mon, 7 Mar 2016 10:38:18 +0800 Subject: ath9k: Allow platform override BTCoex pin Add new platform data to allow override BTCoex default pin. Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo --- include/linux/ath9k_platform.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index 33eb274cd0e6..e66153d60bd5 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -31,6 +31,10 @@ struct ath9k_platform_data { u32 gpio_mask; u32 gpio_val; + u32 bt_active_pin; + u32 bt_priority_pin; + u32 wlan_active_pin; + bool endian_check; bool is_clk_25mhz; bool tx_gain_buffalo; -- cgit v1.2.3 From 08a33805518e7845486f88287e8aace6f8439391 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 9 Mar 2016 11:30:12 -0800 Subject: iio: core: implement iio_device_{claim|release}_direct_mode() It is often the case that the driver wants to be sure a device stays in direct mode while it is executing a task or series of tasks. To accomplish this today, the driver performs this sequence: 1) take the device state lock, 2) verify it is not in a buffered mode, 3) execute some tasks, and 4) release that lock. This patch introduces a pair of helper functions that simplify these steps and make it more semantically expressive. iio_device_claim_direct_mode() If the device is not in any buffered mode it is guaranteed to stay that way until iio_release_direct_mode() is called. iio_device_release_direct_mode() Release the claim. Device is no longer guaranteed to stay in direct mode. Signed-off-by: Alison Schofield Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/iio/iio.h | 2 ++ 2 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 70cb7eb0a75c..2e768bc99f05 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "iio_core.h" #include "iio_core_trigger.h" @@ -1375,6 +1376,44 @@ void devm_iio_device_unregister(struct device *dev, struct iio_dev *indio_dev) } EXPORT_SYMBOL_GPL(devm_iio_device_unregister); +/** + * iio_device_claim_direct_mode - Keep device in direct mode + * @indio_dev: the iio_dev associated with the device + * + * If the device is in direct mode it is guaranteed to stay + * that way until iio_device_release_direct_mode() is called. + * + * Use with iio_device_release_direct_mode() + * + * Returns: 0 on success, -EBUSY on failure + */ +int iio_device_claim_direct_mode(struct iio_dev *indio_dev) +{ + mutex_lock(&indio_dev->mlock); + + if (iio_buffer_enabled(indio_dev)) { + mutex_unlock(&indio_dev->mlock); + return -EBUSY; + } + return 0; +} +EXPORT_SYMBOL_GPL(iio_device_claim_direct_mode); + +/** + * iio_device_release_direct_mode - releases claim on direct mode + * @indio_dev: the iio_dev associated with the device + * + * Release the claim. Device is no longer guaranteed to stay + * in direct mode. + * + * Use with iio_device_claim_direct_mode() + */ +void iio_device_release_direct_mode(struct iio_dev *indio_dev) +{ + mutex_unlock(&indio_dev->mlock); +} +EXPORT_SYMBOL_GPL(iio_device_release_direct_mode); + subsys_initcall(iio_init); module_exit(iio_exit); diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index b2b16772c651..0b2773ada0ba 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -527,6 +527,8 @@ void iio_device_unregister(struct iio_dev *indio_dev); int devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev); void devm_iio_device_unregister(struct device *dev, struct iio_dev *indio_dev); int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); +int iio_device_claim_direct_mode(struct iio_dev *indio_dev); +void iio_device_release_direct_mode(struct iio_dev *indio_dev); extern struct bus_type iio_bus_type; -- cgit v1.2.3 From d13d3a573be5535123beacd926be38e571097bc5 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 23 Mar 2016 11:24:47 +0000 Subject: regulator: add missing descriptions in regulator_desc Members csel_reg and csel_mask of the regulator_desc struct are missing descriptions for documentation. Adding them. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index cd271e89a7e6..01d26244a610 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -255,6 +255,8 @@ enum regulator_type { * * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_ * @vsel_mask: Mask for register bitfield used for selector + * @csel_reg: Register for TPS65218 LS3 current regulator + * @csel_mask: Mask for TPS65218 LS3 current regulator * @apply_reg: Register for initiate voltage change on the output when * using regulator_set_voltage_sel_regmap * @apply_bit: Register bitfield used for initiate voltage change on the -- cgit v1.2.3 From abf2f825d115397944cab91a20c937331d77e37c Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 23 Mar 2016 11:35:39 +0000 Subject: regulator: add missing description for set_over_current_protection Over current protection is missing descriptions for documentation. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 3 +++ include/linux/regulator/machine.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 01d26244a610..1392022fe509 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -93,6 +93,9 @@ struct regulator_linear_range { * @get_current_limit: Get the configured limit for a current-limited regulator. * @set_input_current_limit: Configure an input limit. * + * @set_over_current_protection: Support capability of automatically shutting + * down when detecting an over current event. + * * @set_active_discharge: Set active discharge enable/disable of regulators. * * @set_mode: Set the configured operating mode for the regulator. diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 5d627c83a630..ad3e5158e586 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -97,6 +97,7 @@ struct regulator_state { * @ramp_disable: Disable ramp delay when initialising or when setting voltage. * @soft_start: Enable soft start so that voltage ramps slowly. * @pull_down: Enable pull down when regulator is disabled. + * @over_current_protection: Auto disable on over current event. * * @input_uV: Input voltage for regulator when supplied by another regulator. * -- cgit v1.2.3 From 81f4c50607b423a59f8a1b03e1e8fc409a1dcd22 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:22:01 -0400 Subject: constify security_path_truncate() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- security/apparmor/lsm.c | 2 +- security/security.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cdee11cbcdf1..77c3bfdacf16 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1366,7 +1366,7 @@ union security_list_options { int (*path_rmdir)(struct path *dir, struct dentry *dentry); int (*path_mknod)(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); - int (*path_truncate)(struct path *path); + int (*path_truncate)(const struct path *path); int (*path_symlink)(struct path *dir, struct dentry *dentry, const char *old_name); int (*path_link)(struct dentry *old_dentry, struct path *new_dir, diff --git a/include/linux/security.h b/include/linux/security.h index 157f0cb1e4d2..be37ccab2286 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1447,7 +1447,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); int security_path_rmdir(struct path *dir, struct dentry *dentry); int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); -int security_path_truncate(struct path *path); +int security_path_truncate(const struct path *path); int security_path_symlink(struct path *dir, struct dentry *dentry, const char *old_name); int security_path_link(struct dentry *old_dentry, struct path *new_dir, @@ -1481,7 +1481,7 @@ static inline int security_path_mknod(struct path *dir, struct dentry *dentry, return 0; } -static inline int security_path_truncate(struct path *path) +static inline int security_path_truncate(const struct path *path) { return 0; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 9713037e5257..83e9c3c2cfc8 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -269,7 +269,7 @@ static int apparmor_path_mknod(struct path *dir, struct dentry *dentry, return common_perm_create(OP_MKNOD, dir, dentry, AA_MAY_CREATE, mode); } -static int apparmor_path_truncate(struct path *path) +static int apparmor_path_truncate(const struct path *path) { struct path_cond cond = { d_backing_inode(path->dentry)->i_uid, d_backing_inode(path->dentry)->i_mode diff --git a/security/security.c b/security/security.c index 3644b0344d29..23ffb6cc3974 100644 --- a/security/security.c +++ b/security/security.c @@ -478,7 +478,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, } EXPORT_SYMBOL(security_path_rename); -int security_path_truncate(struct path *path) +int security_path_truncate(const struct path *path) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index cbf3df422c87..8573eee2b58e 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -150,7 +150,7 @@ static int tomoyo_inode_getattr(const struct path *path) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_truncate(struct path *path) +static int tomoyo_path_truncate(const struct path *path) { return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path, NULL); } -- cgit v1.2.3 From 7df818b2370a9aab5fc58a85b70b8af3d835affa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:24:09 -0400 Subject: constify vfs_truncate() Signed-off-by: Al Viro --- fs/open.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/open.c b/fs/open.c index 17cb6b1dab75..2f49fce5c952 100644 --- a/fs/open.c +++ b/fs/open.c @@ -65,7 +65,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, return ret; } -long vfs_truncate(struct path *path, loff_t length) +long vfs_truncate(const struct path *path, loff_t length) { struct inode *inode; long error; diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..09a68517e952 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2253,7 +2253,7 @@ struct filename { const char iname[]; }; -extern long vfs_truncate(struct path *, loff_t); +extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, -- cgit v1.2.3 From 7fd25dac9ad3970bede16f2834daf9f9d779d1b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:44:41 -0400 Subject: constify chown_common/security_path_chown Signed-off-by: Al Viro --- fs/open.c | 2 +- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- security/apparmor/lsm.c | 2 +- security/security.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/open.c b/fs/open.c index 2f49fce5c952..651bf74745a2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -564,7 +564,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) return sys_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(struct path *path, uid_t user, gid_t group) +static int chown_common(const struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 77c3bfdacf16..84f76cbc6d06 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1375,7 +1375,7 @@ union security_list_options { struct path *new_dir, struct dentry *new_dentry); int (*path_chmod)(struct path *path, umode_t mode); - int (*path_chown)(struct path *path, kuid_t uid, kgid_t gid); + int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); int (*path_chroot)(struct path *path); #endif diff --git a/include/linux/security.h b/include/linux/security.h index be37ccab2286..f83ca920ed46 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1456,7 +1456,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry, unsigned int flags); int security_path_chmod(struct path *path, umode_t mode); -int security_path_chown(struct path *path, kuid_t uid, kgid_t gid); +int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) @@ -1513,7 +1513,7 @@ static inline int security_path_chmod(struct path *path, umode_t mode) return 0; } -static inline int security_path_chown(struct path *path, kuid_t uid, kgid_t gid) +static inline int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { return 0; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 21dae6070bb9..3adbff987b77 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -342,7 +342,7 @@ static int apparmor_path_chmod(struct path *path, umode_t mode) return common_perm_mnt_dentry(OP_CHMOD, path->mnt, path->dentry, AA_MAY_CHMOD); } -static int apparmor_path_chown(struct path *path, kuid_t uid, kgid_t gid) +static int apparmor_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { struct path_cond cond = { d_backing_inode(path->dentry)->i_uid, d_backing_inode(path->dentry)->i_mode diff --git a/security/security.c b/security/security.c index 23ffb6cc3974..4a3e7e99abbb 100644 --- a/security/security.c +++ b/security/security.c @@ -492,7 +492,7 @@ int security_path_chmod(struct path *path, umode_t mode) return call_int_hook(path_chmod, 0, path, mode); } -int security_path_chown(struct path *path, kuid_t uid, kgid_t gid) +int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 8573eee2b58e..f0989ec978e1 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -366,7 +366,7 @@ static int tomoyo_path_chmod(struct path *path, umode_t mode) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_chown(struct path *path, kuid_t uid, kgid_t gid) +static int tomoyo_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { int error = 0; if (uid_valid(uid)) -- cgit v1.2.3 From 8a04c43b8741ebb40508d160cf87ca74b70941af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:52:53 -0400 Subject: constify security_sb_mount() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- security/security.c | 2 +- security/selinux/hooks.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 84f76cbc6d06..47117751f4eb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1343,7 +1343,7 @@ union security_list_options { int (*sb_kern_mount)(struct super_block *sb, int flags, void *data); int (*sb_show_options)(struct seq_file *m, struct super_block *sb); int (*sb_statfs)(struct dentry *dentry); - int (*sb_mount)(const char *dev_name, struct path *path, + int (*sb_mount)(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int (*sb_umount)(struct vfsmount *mnt, int flags); int (*sb_pivotroot)(struct path *old_path, struct path *new_path); diff --git a/include/linux/security.h b/include/linux/security.h index f83ca920ed46..415a357efe4c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -222,7 +222,7 @@ int security_sb_remount(struct super_block *sb, void *data); int security_sb_kern_mount(struct super_block *sb, int flags, void *data); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); -int security_sb_mount(const char *dev_name, struct path *path, +int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int security_sb_umount(struct vfsmount *mnt, int flags); int security_sb_pivotroot(struct path *old_path, struct path *new_path); @@ -530,7 +530,7 @@ static inline int security_sb_statfs(struct dentry *dentry) return 0; } -static inline int security_sb_mount(const char *dev_name, struct path *path, +static inline int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data) { diff --git a/security/security.c b/security/security.c index 4a3e7e99abbb..fc567656b16f 100644 --- a/security/security.c +++ b/security/security.c @@ -302,7 +302,7 @@ int security_sb_statfs(struct dentry *dentry) return call_int_hook(sb_statfs, 0, dentry); } -int security_sb_mount(const char *dev_name, struct path *path, +int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data) { return call_int_hook(sb_mount, 0, dev_name, path, type, flags, data); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 912deee3f01e..e3aeacc13545 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2760,7 +2760,7 @@ static int selinux_sb_statfs(struct dentry *dentry) } static int selinux_mount(const char *dev_name, - struct path *path, + const struct path *path, const char *type, unsigned long flags, void *data) diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index f0989ec978e1..c1177f885247 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -401,7 +401,7 @@ static int tomoyo_path_chroot(struct path *path) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_sb_mount(const char *dev_name, struct path *path, +static int tomoyo_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data) { return tomoyo_mount_permission(dev_name, path, type, flags, data); -- cgit v1.2.3 From be01f9f28e66fa846f02196eb047c6bc445642db Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:56:23 -0400 Subject: constify chmod_common/security_path_chmod Signed-off-by: Al Viro --- fs/open.c | 2 +- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- security/apparmor/lsm.c | 2 +- security/security.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/open.c b/fs/open.c index 651bf74745a2..cfdf71a6704e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -499,7 +499,7 @@ out: return error; } -static int chmod_common(struct path *path, umode_t mode) +static int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 47117751f4eb..294fdfe902bf 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1374,7 +1374,7 @@ union security_list_options { int (*path_rename)(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); - int (*path_chmod)(struct path *path, umode_t mode); + int (*path_chmod)(const struct path *path, umode_t mode); int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); int (*path_chroot)(struct path *path); #endif diff --git a/include/linux/security.h b/include/linux/security.h index 415a357efe4c..d6593ee2d0a9 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1455,7 +1455,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry, unsigned int flags); -int security_path_chmod(struct path *path, umode_t mode); +int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ @@ -1508,7 +1508,7 @@ static inline int security_path_rename(struct path *old_dir, return 0; } -static inline int security_path_chmod(struct path *path, umode_t mode) +static inline int security_path_chmod(const struct path *path, umode_t mode) { return 0; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 3adbff987b77..8d19615dcb73 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -334,7 +334,7 @@ static int apparmor_path_rename(struct path *old_dir, struct dentry *old_dentry, return error; } -static int apparmor_path_chmod(struct path *path, umode_t mode) +static int apparmor_path_chmod(const struct path *path, umode_t mode) { if (!mediated_filesystem(path->dentry)) return 0; diff --git a/security/security.c b/security/security.c index fc567656b16f..b333429fe718 100644 --- a/security/security.c +++ b/security/security.c @@ -485,7 +485,7 @@ int security_path_truncate(const struct path *path) return call_int_hook(path_truncate, 0, path); } -int security_path_chmod(struct path *path, umode_t mode) +int security_path_chmod(const struct path *path, umode_t mode) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index c1177f885247..e48d0a4e4128 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -351,7 +351,7 @@ static int tomoyo_file_ioctl(struct file *file, unsigned int cmd, * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_chmod(struct path *path, umode_t mode) +static int tomoyo_path_chmod(const struct path *path, umode_t mode) { return tomoyo_path_number_perm(TOMOYO_TYPE_CHMOD, path, mode & S_IALLUGO); -- cgit v1.2.3 From 989f74e0500a1e136d369bb619adc22786ea5e68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:13:39 -0400 Subject: constify security_path_{unlink,rmdir} Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 4 ++-- include/linux/security.h | 8 ++++---- security/apparmor/lsm.c | 4 ++-- security/security.c | 4 ++-- security/tomoyo/tomoyo.c | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 294fdfe902bf..322912cc2da1 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1360,10 +1360,10 @@ union security_list_options { #ifdef CONFIG_SECURITY_PATH - int (*path_unlink)(struct path *dir, struct dentry *dentry); + int (*path_unlink)(const struct path *dir, struct dentry *dentry); int (*path_mkdir)(struct path *dir, struct dentry *dentry, umode_t mode); - int (*path_rmdir)(struct path *dir, struct dentry *dentry); + int (*path_rmdir)(const struct path *dir, struct dentry *dentry); int (*path_mknod)(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int (*path_truncate)(const struct path *path); diff --git a/include/linux/security.h b/include/linux/security.h index d6593ee2d0a9..e292d8cb21d7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1442,9 +1442,9 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_SECURITY_PATH -int security_path_unlink(struct path *dir, struct dentry *dentry); +int security_path_unlink(const struct path *dir, struct dentry *dentry); int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); -int security_path_rmdir(struct path *dir, struct dentry *dentry); +int security_path_rmdir(const struct path *dir, struct dentry *dentry); int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int security_path_truncate(const struct path *path); @@ -1459,7 +1459,7 @@ int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ -static inline int security_path_unlink(struct path *dir, struct dentry *dentry) +static inline int security_path_unlink(const struct path *dir, struct dentry *dentry) { return 0; } @@ -1470,7 +1470,7 @@ static inline int security_path_mkdir(struct path *dir, struct dentry *dentry, return 0; } -static inline int security_path_rmdir(struct path *dir, struct dentry *dentry) +static inline int security_path_rmdir(const struct path *dir, struct dentry *dentry) { return 0; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 4d2638f4676d..b760fe026b82 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -245,7 +245,7 @@ static int common_perm_create(int op, const struct path *dir, return common_perm_dir_dentry(op, dir, dentry, mask, &cond); } -static int apparmor_path_unlink(struct path *dir, struct dentry *dentry) +static int apparmor_path_unlink(const struct path *dir, struct dentry *dentry) { return common_perm_rm(OP_UNLINK, dir, dentry, AA_MAY_DELETE); } @@ -257,7 +257,7 @@ static int apparmor_path_mkdir(struct path *dir, struct dentry *dentry, S_IFDIR); } -static int apparmor_path_rmdir(struct path *dir, struct dentry *dentry) +static int apparmor_path_rmdir(const struct path *dir, struct dentry *dentry) { return common_perm_rm(OP_RMDIR, dir, dentry, AA_MAY_DELETE); } diff --git a/security/security.c b/security/security.c index b333429fe718..20f2070b3ace 100644 --- a/security/security.c +++ b/security/security.c @@ -427,14 +427,14 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode) } EXPORT_SYMBOL(security_path_mkdir); -int security_path_rmdir(struct path *dir, struct dentry *dentry) +int security_path_rmdir(const struct path *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; return call_int_hook(path_rmdir, 0, dir, dentry); } -int security_path_unlink(struct path *dir, struct dentry *dentry) +int security_path_unlink(const struct path *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index e48d0a4e4128..be5b1ae02f02 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -163,7 +163,7 @@ static int tomoyo_path_truncate(const struct path *path) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_unlink(struct path *parent, struct dentry *dentry) +static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry) { struct path path = { parent->mnt, dentry }; return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path, NULL); @@ -194,7 +194,7 @@ static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry, * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_rmdir(struct path *parent, struct dentry *dentry) +static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry) { struct path path = { parent->mnt, dentry }; return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path, NULL); -- cgit v1.2.3 From d360775217070ff0f4291e47d3f568f0fe0b7374 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:21:09 -0400 Subject: constify security_path_{mkdir,mknod,symlink} ... as well as unix_mknod() and may_o_create() Signed-off-by: Al Viro --- fs/namei.c | 2 +- include/linux/lsm_hooks.h | 6 +++--- include/linux/security.h | 12 ++++++------ net/unix/af_unix.c | 2 +- security/apparmor/lsm.c | 6 +++--- security/security.c | 6 +++--- security/tomoyo/tomoyo.c | 6 +++--- 7 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 794f81dce766..8c97544d6883 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2783,7 +2783,7 @@ static inline int open_to_namei_flags(int flag) return flag; } -static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) +static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t mode) { int error = security_path_mknod(dir, dentry, mode, 0); if (error) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 322912cc2da1..919fb4f98e4f 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1361,13 +1361,13 @@ union security_list_options { #ifdef CONFIG_SECURITY_PATH int (*path_unlink)(const struct path *dir, struct dentry *dentry); - int (*path_mkdir)(struct path *dir, struct dentry *dentry, + int (*path_mkdir)(const struct path *dir, struct dentry *dentry, umode_t mode); int (*path_rmdir)(const struct path *dir, struct dentry *dentry); - int (*path_mknod)(struct path *dir, struct dentry *dentry, + int (*path_mknod)(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int (*path_truncate)(const struct path *path); - int (*path_symlink)(struct path *dir, struct dentry *dentry, + int (*path_symlink)(const struct path *dir, struct dentry *dentry, const char *old_name); int (*path_link)(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); diff --git a/include/linux/security.h b/include/linux/security.h index e292d8cb21d7..ccb8c2a170e3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1443,12 +1443,12 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #ifdef CONFIG_SECURITY_PATH int security_path_unlink(const struct path *dir, struct dentry *dentry); -int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); +int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode); int security_path_rmdir(const struct path *dir, struct dentry *dentry); -int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, +int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int security_path_truncate(const struct path *path); -int security_path_symlink(struct path *dir, struct dentry *dentry, +int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); @@ -1464,7 +1464,7 @@ static inline int security_path_unlink(const struct path *dir, struct dentry *de return 0; } -static inline int security_path_mkdir(struct path *dir, struct dentry *dentry, +static inline int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode) { return 0; @@ -1475,7 +1475,7 @@ static inline int security_path_rmdir(const struct path *dir, struct dentry *den return 0; } -static inline int security_path_mknod(struct path *dir, struct dentry *dentry, +static inline int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { return 0; @@ -1486,7 +1486,7 @@ static inline int security_path_truncate(const struct path *path) return 0; } -static inline int security_path_symlink(struct path *dir, struct dentry *dentry, +static inline int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name) { return 0; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8269da73e9e5..80aa6a3e6817 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -953,7 +953,7 @@ fail: return NULL; } -static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode, +static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode, struct path *res) { int err; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index b760fe026b82..7ae540565097 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -250,7 +250,7 @@ static int apparmor_path_unlink(const struct path *dir, struct dentry *dentry) return common_perm_rm(OP_UNLINK, dir, dentry, AA_MAY_DELETE); } -static int apparmor_path_mkdir(struct path *dir, struct dentry *dentry, +static int apparmor_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode) { return common_perm_create(OP_MKDIR, dir, dentry, AA_MAY_CREATE, @@ -262,7 +262,7 @@ static int apparmor_path_rmdir(const struct path *dir, struct dentry *dentry) return common_perm_rm(OP_RMDIR, dir, dentry, AA_MAY_DELETE); } -static int apparmor_path_mknod(struct path *dir, struct dentry *dentry, +static int apparmor_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { return common_perm_create(OP_MKNOD, dir, dentry, AA_MAY_CREATE, mode); @@ -273,7 +273,7 @@ static int apparmor_path_truncate(const struct path *path) return common_perm_path(OP_TRUNC, path, MAY_WRITE | AA_MAY_META_WRITE); } -static int apparmor_path_symlink(struct path *dir, struct dentry *dentry, +static int apparmor_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name) { return common_perm_create(OP_SYMLINK, dir, dentry, AA_MAY_CREATE, diff --git a/security/security.c b/security/security.c index 20f2070b3ace..7f62e2ed6a28 100644 --- a/security/security.c +++ b/security/security.c @@ -410,7 +410,7 @@ int security_old_inode_init_security(struct inode *inode, struct inode *dir, EXPORT_SYMBOL(security_old_inode_init_security); #ifdef CONFIG_SECURITY_PATH -int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, +int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) @@ -419,7 +419,7 @@ int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, } EXPORT_SYMBOL(security_path_mknod); -int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode) +int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; @@ -442,7 +442,7 @@ int security_path_unlink(const struct path *dir, struct dentry *dentry) } EXPORT_SYMBOL(security_path_unlink); -int security_path_symlink(struct path *dir, struct dentry *dentry, +int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index be5b1ae02f02..d44752562b9b 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -178,7 +178,7 @@ static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry, +static int tomoyo_path_mkdir(const struct path *parent, struct dentry *dentry, umode_t mode) { struct path path = { parent->mnt, dentry }; @@ -209,7 +209,7 @@ static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry, +static int tomoyo_path_symlink(const struct path *parent, struct dentry *dentry, const char *old_name) { struct path path = { parent->mnt, dentry }; @@ -226,7 +226,7 @@ static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry, * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry, +static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry, umode_t mode, unsigned int dev) { struct path path = { parent->mnt, dentry }; -- cgit v1.2.3 From 3ccee46ab487d5b87d0621824efe2500b2857c58 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:27:45 -0400 Subject: constify security_path_{link,rename} Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 6 +++--- include/linux/security.h | 12 ++++++------ security/apparmor/lsm.c | 6 +++--- security/security.c | 6 +++--- security/tomoyo/tomoyo.c | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 919fb4f98e4f..52c2ac5f4855 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1369,10 +1369,10 @@ union security_list_options { int (*path_truncate)(const struct path *path); int (*path_symlink)(const struct path *dir, struct dentry *dentry, const char *old_name); - int (*path_link)(struct dentry *old_dentry, struct path *new_dir, + int (*path_link)(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry); - int (*path_rename)(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, + int (*path_rename)(const struct path *old_dir, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry); int (*path_chmod)(const struct path *path, umode_t mode); int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); diff --git a/include/linux/security.h b/include/linux/security.h index ccb8c2a170e3..82854115e36b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1450,10 +1450,10 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t m int security_path_truncate(const struct path *path); int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); -int security_path_link(struct dentry *old_dentry, struct path *new_dir, +int security_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry); -int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry, +int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry, unsigned int flags); int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); @@ -1493,15 +1493,15 @@ static inline int security_path_symlink(const struct path *dir, struct dentry *d } static inline int security_path_link(struct dentry *old_dentry, - struct path *new_dir, + const struct path *new_dir, struct dentry *new_dentry) { return 0; } -static inline int security_path_rename(struct path *old_dir, +static inline int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, + const struct path *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index eadaa58bd6fd..2660fbcf94d1 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -280,7 +280,7 @@ static int apparmor_path_symlink(const struct path *dir, struct dentry *dentry, S_IFLNK); } -static int apparmor_path_link(struct dentry *old_dentry, struct path *new_dir, +static int apparmor_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { struct aa_profile *profile; @@ -295,8 +295,8 @@ static int apparmor_path_link(struct dentry *old_dentry, struct path *new_dir, return error; } -static int apparmor_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry) +static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry) { struct aa_profile *profile; int error = 0; diff --git a/security/security.c b/security/security.c index 7f62e2ed6a28..33b85a960128 100644 --- a/security/security.c +++ b/security/security.c @@ -450,7 +450,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry, return call_int_hook(path_symlink, 0, dir, dentry, old_name); } -int security_path_link(struct dentry *old_dentry, struct path *new_dir, +int security_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)))) @@ -458,8 +458,8 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry); } -int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry, +int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry, unsigned int flags) { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) || diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index d44752562b9b..6a858f2f4063 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -265,7 +265,7 @@ static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry, * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir, +static int tomoyo_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { struct path path1 = { new_dir->mnt, old_dentry }; @@ -283,9 +283,9 @@ static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir, * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_rename(struct path *old_parent, +static int tomoyo_path_rename(const struct path *old_parent, struct dentry *old_dentry, - struct path *new_parent, + const struct path *new_parent, struct dentry *new_dentry) { struct path path1 = { old_parent->mnt, old_dentry }; -- cgit v1.2.3 From 77b286c0d26a5399912f5affd90ed73e2d8b42a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:28:43 -0400 Subject: constify security_path_chroot() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- security/security.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 52c2ac5f4855..e2baca48e596 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1376,7 +1376,7 @@ union security_list_options { struct dentry *new_dentry); int (*path_chmod)(const struct path *path, umode_t mode); int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); - int (*path_chroot)(struct path *path); + int (*path_chroot)(const struct path *path); #endif int (*inode_alloc_security)(struct inode *inode); diff --git a/include/linux/security.h b/include/linux/security.h index 82854115e36b..cb53cffbfae4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1457,7 +1457,7 @@ int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, unsigned int flags); int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); -int security_path_chroot(struct path *path); +int security_path_chroot(const struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(const struct path *dir, struct dentry *dentry) { @@ -1518,7 +1518,7 @@ static inline int security_path_chown(const struct path *path, kuid_t uid, kgid_ return 0; } -static inline int security_path_chroot(struct path *path) +static inline int security_path_chroot(const struct path *path) { return 0; } diff --git a/security/security.c b/security/security.c index 33b85a960128..cf6f31df524a 100644 --- a/security/security.c +++ b/security/security.c @@ -499,7 +499,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) return call_int_hook(path_chown, 0, path, uid, gid); } -int security_path_chroot(struct path *path) +int security_path_chroot(const struct path *path) { return call_int_hook(path_chroot, 0, path); } diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 6a858f2f4063..c7764bb747aa 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -385,7 +385,7 @@ static int tomoyo_path_chown(const struct path *path, kuid_t uid, kgid_t gid) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_chroot(struct path *path) +static int tomoyo_path_chroot(const struct path *path) { return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path, NULL); } -- cgit v1.2.3 From 3b73b68c05db0b3c9b282c6e8e6eb71acc589a02 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:31:19 -0400 Subject: constify security_sb_pivotroot() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 6 +++--- security/security.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e2baca48e596..41c0aa6d39ea 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1346,7 +1346,7 @@ union security_list_options { int (*sb_mount)(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int (*sb_umount)(struct vfsmount *mnt, int flags); - int (*sb_pivotroot)(struct path *old_path, struct path *new_path); + int (*sb_pivotroot)(const struct path *old_path, const struct path *new_path); int (*sb_set_mnt_opts)(struct super_block *sb, struct security_mnt_opts *opts, unsigned long kern_flags, diff --git a/include/linux/security.h b/include/linux/security.h index cb53cffbfae4..fcfa211c694f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -225,7 +225,7 @@ int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int security_sb_umount(struct vfsmount *mnt, int flags); -int security_sb_pivotroot(struct path *old_path, struct path *new_path); +int security_sb_pivotroot(const struct path *old_path, const struct path *new_path); int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts, unsigned long kern_flags, @@ -542,8 +542,8 @@ static inline int security_sb_umount(struct vfsmount *mnt, int flags) return 0; } -static inline int security_sb_pivotroot(struct path *old_path, - struct path *new_path) +static inline int security_sb_pivotroot(const struct path *old_path, + const struct path *new_path) { return 0; } diff --git a/security/security.c b/security/security.c index cf6f31df524a..f7af0aaa173e 100644 --- a/security/security.c +++ b/security/security.c @@ -313,7 +313,7 @@ int security_sb_umount(struct vfsmount *mnt, int flags) return call_int_hook(sb_umount, 0, mnt, flags); } -int security_sb_pivotroot(struct path *old_path, struct path *new_path) +int security_sb_pivotroot(const struct path *old_path, const struct path *new_path) { return call_int_hook(sb_pivotroot, 0, old_path, new_path); } diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index c7764bb747aa..75c998700190 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -429,7 +429,7 @@ static int tomoyo_sb_umount(struct vfsmount *mnt, int flags) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_sb_pivotroot(struct path *old_path, struct path *new_path) +static int tomoyo_sb_pivotroot(const struct path *old_path, const struct path *new_path) { return tomoyo_path2_perm(TOMOYO_TYPE_PIVOT_ROOT, new_path, old_path); } -- cgit v1.2.3 From 3b672623079bb3e5685b8549e514f2dfaa564406 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 28 Mar 2016 13:09:56 +0900 Subject: regulator: s2mps11: Fix invalid selector mask and voltages for buck9 The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff instead of 0x1f) thus reading entire register as buck9's voltage. This effectively caused regulator core to interpret values as higher voltages than they were and then to set real voltage much lower than intended. The buck9 provides power to other regulators, including LDO13 and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower voltage caused SD card detection errors on Odroid XU3/XU4: mmc1: card never left busy state mmc1: error -110 whilst initialising SD card During driver probe the regulator core was checking whether initial voltage matches the constraints. With incorrect vsel_mask of 0xff and default value of 0x50, the core interpreted this as 5 V which is outside of constraints (3-3.775 V). Then the regulator core was adjusting the voltage to match the constraints. With incorrect vsel_mask this new voltage mapped to a vere low voltage in the driver. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Mark Brown Cc: --- drivers/regulator/s2mps11.c | 28 ++++++++++++++++++++++------ include/linux/mfd/samsung/s2mps11.h | 2 ++ 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index d24e2c783dc5..6dfa3502e1f1 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -308,7 +308,7 @@ static struct regulator_ops s2mps11_buck_ops = { .enable_mask = S2MPS11_ENABLE_MASK \ } -#define regulator_desc_s2mps11_buck6_10(num, min, step) { \ +#define regulator_desc_s2mps11_buck67810(num, min, step) { \ .name = "BUCK"#num, \ .id = S2MPS11_BUCK##num, \ .ops = &s2mps11_buck_ops, \ @@ -324,6 +324,22 @@ static struct regulator_ops s2mps11_buck_ops = { .enable_mask = S2MPS11_ENABLE_MASK \ } +#define regulator_desc_s2mps11_buck9 { \ + .name = "BUCK9", \ + .id = S2MPS11_BUCK9, \ + .ops = &s2mps11_buck_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = MIN_3000_MV, \ + .uV_step = STEP_25_MV, \ + .n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \ + .ramp_delay = S2MPS11_RAMP_DELAY, \ + .vsel_reg = S2MPS11_REG_B9CTRL2, \ + .vsel_mask = S2MPS11_BUCK9_VSEL_MASK, \ + .enable_reg = S2MPS11_REG_B9CTRL1, \ + .enable_mask = S2MPS11_ENABLE_MASK \ +} + static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_ldo(1, STEP_25_MV), regulator_desc_s2mps11_ldo(2, STEP_50_MV), @@ -368,11 +384,11 @@ static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_buck1_4(3), regulator_desc_s2mps11_buck1_4(4), regulator_desc_s2mps11_buck5, - regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV), - regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV), + regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck9, + regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV), }; static struct regulator_ops s2mps14_reg_ops; diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h index b288965e8101..2c14eeca46f0 100644 --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -173,10 +173,12 @@ enum s2mps11_regulators { #define S2MPS11_LDO_VSEL_MASK 0x3F #define S2MPS11_BUCK_VSEL_MASK 0xFF +#define S2MPS11_BUCK9_VSEL_MASK 0x1F #define S2MPS11_ENABLE_MASK (0x03 << S2MPS11_ENABLE_SHIFT) #define S2MPS11_ENABLE_SHIFT 0x06 #define S2MPS11_LDO_N_VOLTAGES (S2MPS11_LDO_VSEL_MASK + 1) #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1) +#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1) #define S2MPS11_RAMP_DELAY 25000 /* uV/us */ #define S2MPS11_CTRL1_PWRHOLD_MASK BIT(4) -- cgit v1.2.3 From 2da62906b1e298695e1bb725927041cd59942c98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Mar 2015 21:13:46 -0400 Subject: [net] drop 'size' argument of sock_recvmsg() all callers have it equal to msg_data_left(msg). Signed-off-by: Al Viro --- drivers/target/iscsi/iscsi_target_util.c | 5 ++--- include/linux/net.h | 3 +-- net/socket.c | 23 ++++++++++------------- 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 428b0d9e3dba..57720385a751 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1283,9 +1283,8 @@ static int iscsit_do_rx_data( iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, count->iov, count->iov_count, data); - while (total_rx < data) { - rx_loop = sock_recvmsg(conn->sock, &msg, - (data - total_rx), MSG_WAITALL); + while (msg_data_left(&msg)) { + rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL); if (rx_loop <= 0) { pr_debug("rx_loop: %d total_rx: %d\n", rx_loop, total_rx); diff --git a/include/linux/net.h b/include/linux/net.h index 49175e4ced11..72c1e0622ce2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -218,8 +218,7 @@ int sock_create_lite(int family, int type, int proto, struct socket **res); struct socket *sock_alloc(void); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); -int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags); +int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); struct socket *sockfd_lookup(int fd, int *err); struct socket *sock_from_file(struct file *file, int *err); diff --git a/net/socket.c b/net/socket.c index 5f77a8e93830..956426e347af 100644 --- a/net/socket.c +++ b/net/socket.c @@ -709,17 +709,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) + int flags) { - return sock->ops->recvmsg(sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); } -int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) { - int err = security_socket_recvmsg(sock, msg, size, flags); + int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); - return err ?: sock_recvmsg_nosec(sock, msg, size, flags); + return err ?: sock_recvmsg_nosec(sock, msg, flags); } EXPORT_SYMBOL(sock_recvmsg); @@ -746,7 +745,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); set_fs(KERNEL_DS); - result = sock_recvmsg(sock, msg, size, flags); + result = sock_recvmsg(sock, msg, flags); set_fs(oldfs); return result; } @@ -796,7 +795,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!iov_iter_count(to)) /* Match SYS5 behaviour */ return 0; - res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags); + res = sock_recvmsg(sock, &msg, msg.msg_flags); *to = msg.msg_iter; return res; } @@ -1696,7 +1695,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); + err = sock_recvmsg(sock, &msg, flags); if (err >= 0 && addr != NULL) { err2 = move_addr_to_user(&address, @@ -2073,7 +2072,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; unsigned long cmsg_ptr; - int total_len, len; + int len; ssize_t err; /* kernel mode address */ @@ -2091,7 +2090,6 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov); if (err < 0) return err; - total_len = iov_iter_count(&msg_sys->msg_iter); cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); @@ -2101,8 +2099,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, - total_len, flags); + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags); if (err < 0) goto out_freeiov; len = err; -- cgit v1.2.3 From 005e46857ed598bcf76035366cd2841e3b7f8c54 Mon Sep 17 00:00:00 2001 From: Maarten ter Huurne Date: Thu, 17 Mar 2016 15:05:07 +0100 Subject: regulator: act8865: Pass of_node via act8865_regulator_data This makes the code easier to read and it avoids a dynamic memory allocation. Signed-off-by: Maarten ter Huurne Signed-off-by: Mark Brown --- drivers/regulator/act8865-regulator.c | 28 ++++++++++++---------------- include/linux/regulator/act8865.h | 2 ++ 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c index 69cdad0f71ba..527926b045d5 100644 --- a/drivers/regulator/act8865-regulator.c +++ b/drivers/regulator/act8865-regulator.c @@ -319,7 +319,6 @@ static struct of_regulator_match act8600_matches[] = { }; static int act8865_pdata_from_dt(struct device *dev, - struct device_node **of_node, struct act8865_platform_data *pdata, unsigned long type) { @@ -370,7 +369,7 @@ static int act8865_pdata_from_dt(struct device *dev, regulator->id = i; regulator->name = matches[i].name; regulator->init_data = matches[i].init_data; - of_node[i] = matches[i].of_node; + regulator->of_node = matches[i].of_node; regulator++; } @@ -378,7 +377,6 @@ static int act8865_pdata_from_dt(struct device *dev, } #else static inline int act8865_pdata_from_dt(struct device *dev, - struct device_node **of_node, struct act8865_platform_data *pdata, unsigned long type) { @@ -386,8 +384,8 @@ static inline int act8865_pdata_from_dt(struct device *dev, } #endif -static struct regulator_init_data -*act8865_get_init_data(int id, struct act8865_platform_data *pdata) +static struct act8865_regulator_data *act8865_get_regulator_data( + int id, struct act8865_platform_data *pdata) { int i; @@ -396,7 +394,7 @@ static struct regulator_init_data for (i = 0; i < pdata->num_regulators; i++) { if (pdata->regulators[i].id == id) - return pdata->regulators[i].init_data; + return &pdata->regulators[i]; } return NULL; @@ -418,7 +416,6 @@ static int act8865_pmic_probe(struct i2c_client *client, const struct regulator_desc *regulators; struct act8865_platform_data pdata_of, *pdata; struct device *dev = &client->dev; - struct device_node **of_node; int i, ret, num_regulators; struct act8865 *act8865; unsigned long type; @@ -472,13 +469,8 @@ static int act8865_pmic_probe(struct i2c_client *client, return -EINVAL; } - of_node = devm_kzalloc(dev, sizeof(struct device_node *) * - num_regulators, GFP_KERNEL); - if (!of_node) - return -ENOMEM; - if (dev->of_node && !pdata) { - ret = act8865_pdata_from_dt(dev, of_node, &pdata_of, type); + ret = act8865_pdata_from_dt(dev, &pdata_of, type); if (ret < 0) return ret; @@ -511,14 +503,19 @@ static int act8865_pmic_probe(struct i2c_client *client, for (i = 0; i < num_regulators; i++) { const struct regulator_desc *desc = ®ulators[i]; struct regulator_config config = { }; + struct act8865_regulator_data *rdata; struct regulator_dev *rdev; config.dev = dev; - config.init_data = act8865_get_init_data(desc->id, pdata); - config.of_node = of_node[i]; config.driver_data = act8865; config.regmap = act8865->regmap; + rdata = act8865_get_regulator_data(desc->id, pdata); + if (rdata) { + config.init_data = rdata->init_data; + config.of_node = rdata->of_node; + } + rdev = devm_regulator_register(dev, desc, &config); if (IS_ERR(rdev)) { dev_err(dev, "failed to register %s\n", desc->name); @@ -527,7 +524,6 @@ static int act8865_pmic_probe(struct i2c_client *client, } i2c_set_clientdata(client, act8865); - devm_kfree(dev, of_node); return 0; } diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h index 2eb386017fa5..113d861a1e4c 100644 --- a/include/linux/regulator/act8865.h +++ b/include/linux/regulator/act8865.h @@ -69,11 +69,13 @@ enum { * @id: regulator id * @name: regulator name * @init_data: regulator init data + * @of_node: device tree node (optional) */ struct act8865_regulator_data { int id; const char *name; struct regulator_init_data *init_data; + struct device_node *of_node; }; /** -- cgit v1.2.3 From 49db08c358873af11ba3c25401de88156fa5d365 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 19 Feb 2016 15:36:07 +0100 Subject: chrdev: emit a warning when we go below dynamic major range Currently a dynamically allocated character device major is taken from 254 and downward. This mechanism is used for RTC, IIO and a few other subsystems. The kernel currently has no check prevening these dynamic allocations from eating into the assigned numbers at 233 and downward. In a recent test it was reported that so many dynamic device majors were used on a test server, that the major number for infiniband (231) was stolen. This occurred when allocating a new major number for GPIO chips. The error messages from the kernel were not helpful. (See: https://lkml.org/lkml/2016/2/14/124) This patch adds a defined lower limit of the dynamic major allocation region will henceforth emit a warning if we start to eat into the assigned numbers. It does not do any semantic changes and will not change the kernels behaviour: numbers will still continue to be stolen, but we will know from dmesg what is going on. This also updates the Documentation/devices.txt to clearly reflect that we are using this range of major numbers for dynamic allocation. Reported-by: Ying Huang Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Alan Cox Cc: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- Documentation/devices.txt | 6 +++--- fs/char_dev.c | 4 ++++ include/linux/fs.h | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 87b4c5e82d39..0a3588a9798d 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -3099,9 +3099,9 @@ Your cooperation is appreciated. 129 = /dev/ipath_sma Device used by Subnet Management Agent 130 = /dev/ipath_diag Device used by diagnostics programs -234-239 UNASSIGNED - -240-254 char LOCAL/EXPERIMENTAL USE +234-254 char RESERVED FOR DYNAMIC ASSIGNMENT + Character devices that request a dynamic allocation of major number will + take numbers starting from 254 and downward. 240-254 block LOCAL/EXPERIMENTAL USE Allocated for local/experimental use. For devices not diff --git a/fs/char_dev.c b/fs/char_dev.c index 24b142569ca9..687471dc04a0 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -91,6 +91,10 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, break; } + if (i < CHRDEV_MAJOR_DYN_END) + pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range", + name, i); + if (i == 0) { ret = -EBUSY; goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..60082be96de8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2385,6 +2385,8 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, /* fs/char_dev.c */ #define CHRDEV_MAJOR_HASH_SIZE 255 +/* Marks the bottom of the first segment of free char majors */ +#define CHRDEV_MAJOR_DYN_END 234 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, -- cgit v1.2.3 From b3c1be1b789cca6d3e39c950dfed690f0511fe76 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 22 Jan 2016 11:28:07 -0500 Subject: base: isa: Remove X86_32 dependency Many motherboards utilize a LPC to ISA bridge in order to decode ISA-style port-mapped I/O addresses. This is particularly true for embedded motherboards supporting the PC/104 bus (a bus specification derived from ISA). These motherboards are now commonly running 64-bit x86 processors. The X86_32 dependency should be removed from the ISA bus configuration option in order to support these newer motherboards. A new config option, CONFIG_ISA_BUS, is introduced to allow for the compilation of the ISA bus driver independent of the CONFIG_ISA option. Devices which communicate via ISA-compatible buses can now be supported independent of the dependencies of the CONFIG_ISA option. Signed-off-by: William Breathitt Gray Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 6 ++++++ drivers/base/Makefile | 2 +- include/linux/isa.h | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2dc18605831f..a5977986f38b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2472,10 +2472,16 @@ config ISA_DMA_API Enables ISA-style DMA support for devices requiring such controllers. If unsure, say Y. +config ISA_BUS + bool "ISA bus support" + help + Enables ISA bus support for devices requiring such controllers. + if X86_32 config ISA bool "ISA support" + depends on ISA_BUS ---help--- Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 6b2a84e7f2be..4ebfb81cc7e9 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_ISA) += isa.o +obj-$(CONFIG_ISA_BUS) += isa.o obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o diff --git a/include/linux/isa.h b/include/linux/isa.h index b0270e3814c8..2a02862775eb 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -22,7 +22,7 @@ struct isa_driver { #define to_isa_driver(x) container_of((x), struct isa_driver, driver) -#ifdef CONFIG_ISA +#ifdef CONFIG_ISA_BUS int isa_register_driver(struct isa_driver *, unsigned int); void isa_unregister_driver(struct isa_driver *); #else -- cgit v1.2.3 From f235541699bcf14fb8be797c6bc1d7106df0eb64 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 22 Jan 2016 01:32:26 -0500 Subject: export.h: allow for per-symbol configurable EXPORT_SYMBOL() Similar to include/generated/autoconf.h, include/generated/autoksyms.h will contain a list of defines for each EXPORT_SYMBOL() that we want active. The format is: #define __KSYM_ 1 This list will be auto-generated with another patch. For now we only include the preprocessor magic to automatically create or omit the corresponding struct kernel_symbol declaration. Given the content of include/generated/autoksyms.h may not be known in advance, an empty file is created early on to let the build proceed. Signed-off-by: Nicolas Pitre Acked-by: Rusty Russell --- Makefile | 2 ++ include/linux/export.h | 22 ++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Makefile b/Makefile index 916b26e999d8..451acbebee97 100644 --- a/Makefile +++ b/Makefile @@ -998,6 +998,8 @@ prepare2: prepare3 outputmakefile asm-generic prepare1: prepare2 $(version_h) include/generated/utsrelease.h \ include/config/auto.conf $(cmd_crmodverdir) + $(Q)test -e include/generated/autoksyms.h || \ + touch include/generated/autoksyms.h archprepare: archheaders archscripts prepare1 scripts_basic diff --git a/include/linux/export.h b/include/linux/export.h index 96e45ea463e7..77afdb2a2506 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -38,7 +38,7 @@ extern struct module __this_module; #ifdef CONFIG_MODULES -#ifndef __GENKSYMS__ +#if defined(__KERNEL__) && !defined(__GENKSYMS__) #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ @@ -53,7 +53,7 @@ extern struct module __this_module; #endif /* For every exported symbol, place a struct in the __ksymtab section */ -#define __EXPORT_SYMBOL(sym, sec) \ +#define ___EXPORT_SYMBOL(sym, sec) \ extern typeof(sym) sym; \ __CRC_SYMBOL(sym, sec) \ static const char __kstrtab_##sym[] \ @@ -65,6 +65,24 @@ extern struct module __this_module; __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ = { (unsigned long)&sym, __kstrtab_##sym } +#ifdef CONFIG_TRIM_UNUSED_KSYMS + +#include +#include + +#define __EXPORT_SYMBOL(sym, sec) \ + __cond_export_sym(sym, sec, config_enabled(__KSYM_##sym)) +#define __cond_export_sym(sym, sec, conf) \ + ___cond_export_sym(sym, sec, conf) +#define ___cond_export_sym(sym, sec, enabled) \ + __cond_export_sym_##enabled(sym, sec) +#define __cond_export_sym_1(sym, sec) ___EXPORT_SYMBOL(sym, sec) +#define __cond_export_sym_0(sym, sec) /* nothing */ + +#else +#define __EXPORT_SYMBOL ___EXPORT_SYMBOL +#endif + #define EXPORT_SYMBOL(sym) \ __EXPORT_SYMBOL(sym, "") -- cgit v1.2.3 From c1a95fda2a40ae8c7aad3fa44fa7718a3710eb2d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 22 Jan 2016 13:41:57 -0500 Subject: kbuild: add fine grained build dependencies for exported symbols Like with kconfig options, we now have the ability to compile in and out individual EXPORT_SYMBOL() declarations based on the content of include/generated/autoksyms.h. However we don't want the entire world to be rebuilt whenever that file is touched. Let's apply the same build dependency trick used for CONFIG_* symbols where the time stamp of empty files whose paths matching those symbols is used to trigger fine grained rebuilds. In our case the key is the symbol name passed to EXPORT_SYMBOL(). However, unlike config options, we cannot just use fixdep to parse the source code for EXPORT_SYMBOL(ksym) because several variants exist and parsing them all in a separate tool, and keeping it in synch, is not trivially maintainable. Furthermore, there are variants such as EXPORT_SYMBOL_GPL(pci_user_read_config_##size); that are instanciated via a macro for which we can't easily determine the actual exported symbol name(s) short of actually running the preprocessor on them. Storing the symbol name string in a special ELF section doesn't work for targets that output assembly or preprocessed source. So the best way is really to leverage the preprocessor by having it output actual symbol names anchored by a special sequence that can be easily filtered out. Then the list of symbols is simply fed to fixdep to be merged with the other dependencies. That implies the preprocessor is executed twice for each source file. A previous attempt relied on a warning pragma for each EXPORT_SYMBOL() instance that was filtered apart from stderr by the build system with a sed script during the actual compilation pass. Unfortunately the preprocessor/compiler diagnostic output isn't stable between versions and this solution, although more efficient, was deemed too fragile. Because of the lowercasing performed by fixdep, there might be name collisions triggering spurious rebuilds for similar symbols. But this shouldn't be a big issue in practice. (This is the case for CONFIG_* symbols and I didn't want to be different here, whatever the original reason for doing so.) To avoid needless build overhead, the exported symbol name gathering is performed only when CONFIG_TRIM_UNUSED_KSYMS is selected. Signed-off-by: Nicolas Pitre Acked-by: Rusty Russell --- include/linux/export.h | 13 ++++++++++++- scripts/Kbuild.include | 27 +++++++++++++++++++++++++++ scripts/basic/fixdep.c | 1 + 3 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index 77afdb2a2506..2f9ccbe6a639 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -65,7 +65,18 @@ extern struct module __this_module; __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ = { (unsigned long)&sym, __kstrtab_##sym } -#ifdef CONFIG_TRIM_UNUSED_KSYMS +#if defined(__KSYM_DEPS__) + +/* + * For fine grained build dependencies, we want to tell the build system + * about each possible exported symbol even if they're not actually exported. + * We use a string pattern that is unlikely to be valid code that the build + * system filters out from the preprocessor output (see ksym_dep_filter + * in scripts/Kbuild.include). + */ +#define __EXPORT_SYMBOL(sym, sec) === __KSYM_##sym === + +#elif defined(CONFIG_TRIM_UNUSED_KSYMS) #include #include diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 80ca538bfba9..a09927e02713 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -258,12 +258,39 @@ if_changed_dep = $(if $(strip $(any-prereq) $(arg-check) ), \ @set -e; \ $(cmd_and_fixdep), @:) +ifndef CONFIG_TRIM_UNUSED_KSYMS + cmd_and_fixdep = \ $(echo-cmd) $(cmd_$(1)); \ scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp;\ rm -f $(depfile); \ mv -f $(dot-target).tmp $(dot-target).cmd; +else + +# Filter out exported kernel symbol names from the preprocessor output. +# See also __KSYM_DEPS__ in include/linux/export.h. +# We disable the depfile generation here, so as not to overwrite the existing +# depfile while fixdep is parsing it. +flags_nodeps = $(filter-out -Wp$(comma)-M%, $($(1))) +ksym_dep_filter = \ + case "$(1)" in \ + cc_*_c) $(CPP) $(call flags_nodeps,c_flags) -D__KSYM_DEPS__ $< ;; \ + as_*_S) $(CPP) $(call flags_nodeps,a_flags) -D__KSYM_DEPS__ $< ;; \ + boot*|build*|*cpp_lds_S|dtc|host*|vdso*) : ;; \ + *) echo "Don't know how to preprocess $(1)" >&2; false ;; \ + esac | sed -rn 's/^.*=== __KSYM_(.*) ===.*$$/KSYM_\1/p' + +cmd_and_fixdep = \ + $(echo-cmd) $(cmd_$(1)); \ + $(ksym_dep_filter) | \ + scripts/basic/fixdep -e $(depfile) $@ '$(make-cmd)' \ + > $(dot-target).tmp; \ + rm -f $(depfile); \ + mv -f $(dot-target).tmp $(dot-target).cmd; + +endif + # Usage: $(call if_changed_rule,foo) # Will check if $(cmd_foo) or any of the prerequisites changed, # and if so will execute $(rule_foo). diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 7e90a1f7de0f..746ec1ece614 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -358,6 +358,7 @@ static void parse_dep_file(void *map, size_t len) /* Ignore certain dependencies */ if (strrcmp(s, "include/generated/autoconf.h") && + strrcmp(s, "include/generated/autoksyms.h") && strrcmp(s, "arch/um/include/uml-config.h") && strrcmp(s, "include/linux/kconfig.h") && strrcmp(s, ".ver")) { -- cgit v1.2.3 From 6c96f05c8bb8bc4177613ef3c23a56b455e75887 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Feb 2016 18:46:24 +0100 Subject: reset: Make [of_]reset_control_get[_foo] functions wrappers With both the regular, _by_index and _optional variants we already have quite a few variants of [of_]reset_control_get[_foo], the upcoming addition of shared reset lines support makes this worse. This commit changes all the variants into wrappers around common core functions. For completeness sake this commit also adds a new devm_get_reset_control_by_index wrapper. Signed-off-by: Hans de Goede Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 84 +++++++-------------------------- include/linux/reset.h | 126 +++++++++++++++++++++++++++++++++++--------------- 2 files changed, 107 insertions(+), 103 deletions(-) (limited to 'include/linux') diff --git a/drivers/reset/core.c b/drivers/reset/core.c index f15f150b79da..bdf1763da87a 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -136,18 +136,8 @@ int reset_control_status(struct reset_control *rstc) } EXPORT_SYMBOL_GPL(reset_control_status); -/** - * of_reset_control_get_by_index - Lookup and obtain a reference to a reset - * controller by index. - * @node: device to be reset by the controller - * @index: index of the reset controller - * - * This is to be used to perform a list of resets for a device or power domain - * in whatever order. Returns a struct reset_control or IS_ERR() condition - * containing errno. - */ -struct reset_control *of_reset_control_get_by_index(struct device_node *node, - int index) +struct reset_control *__of_reset_control_get(struct device_node *node, + const char *id, int index) { struct reset_control *rstc; struct reset_controller_dev *r, *rcdev; @@ -155,6 +145,16 @@ struct reset_control *of_reset_control_get_by_index(struct device_node *node, int rstc_id; int ret; + if (!node) + return ERR_PTR(-EINVAL); + + if (id) { + index = of_property_match_string(node, + "reset-names", id); + if (index < 0) + return ERR_PTR(-ENOENT); + } + ret = of_parse_phandle_with_args(node, "resets", "#reset-cells", index, &args); if (ret) @@ -200,49 +200,7 @@ struct reset_control *of_reset_control_get_by_index(struct device_node *node, return rstc; } -EXPORT_SYMBOL_GPL(of_reset_control_get_by_index); - -/** - * of_reset_control_get - Lookup and obtain a reference to a reset controller. - * @node: device to be reset by the controller - * @id: reset line name - * - * Returns a struct reset_control or IS_ERR() condition containing errno. - * - * Use of id names is optional. - */ -struct reset_control *of_reset_control_get(struct device_node *node, - const char *id) -{ - int index = 0; - - if (id) { - index = of_property_match_string(node, - "reset-names", id); - if (index < 0) - return ERR_PTR(-ENOENT); - } - return of_reset_control_get_by_index(node, index); -} -EXPORT_SYMBOL_GPL(of_reset_control_get); - -/** - * reset_control_get - Lookup and obtain a reference to a reset controller. - * @dev: device to be reset by the controller - * @id: reset line name - * - * Returns a struct reset_control or IS_ERR() condition containing errno. - * - * Use of id names is optional. - */ -struct reset_control *reset_control_get(struct device *dev, const char *id) -{ - if (!dev) - return ERR_PTR(-EINVAL); - - return of_reset_control_get(dev->of_node, id); -} -EXPORT_SYMBOL_GPL(reset_control_get); +EXPORT_SYMBOL_GPL(__of_reset_control_get); /** * reset_control_put - free the reset controller @@ -264,16 +222,8 @@ static void devm_reset_control_release(struct device *dev, void *res) reset_control_put(*(struct reset_control **)res); } -/** - * devm_reset_control_get - resource managed reset_control_get() - * @dev: device to be reset by the controller - * @id: reset line name - * - * Managed reset_control_get(). For reset controllers returned from this - * function, reset_control_put() is called automatically on driver detach. - * See reset_control_get() for more information. - */ -struct reset_control *devm_reset_control_get(struct device *dev, const char *id) +struct reset_control *__devm_reset_control_get(struct device *dev, + const char *id, int index) { struct reset_control **ptr, *rstc; @@ -282,7 +232,7 @@ struct reset_control *devm_reset_control_get(struct device *dev, const char *id) if (!ptr) return ERR_PTR(-ENOMEM); - rstc = reset_control_get(dev, id); + rstc = __of_reset_control_get(dev ? dev->of_node : NULL, id, index); if (!IS_ERR(rstc)) { *ptr = rstc; devres_add(dev, ptr); @@ -292,7 +242,7 @@ struct reset_control *devm_reset_control_get(struct device *dev, const char *id) return rstc; } -EXPORT_SYMBOL_GPL(devm_reset_control_get); +EXPORT_SYMBOL_GPL(__devm_reset_control_get); /** * device_reset - find reset controller associated with the device diff --git a/include/linux/reset.h b/include/linux/reset.h index c4c097de0ba9..1bb69a29d6db 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -1,8 +1,8 @@ #ifndef _LINUX_RESET_H_ #define _LINUX_RESET_H_ -struct device; -struct device_node; +#include + struct reset_control; #ifdef CONFIG_RESET_CONTROLLER @@ -12,9 +12,11 @@ int reset_control_assert(struct reset_control *rstc); int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); -struct reset_control *reset_control_get(struct device *dev, const char *id); +struct reset_control *__of_reset_control_get(struct device_node *node, + const char *id, int index); void reset_control_put(struct reset_control *rstc); -struct reset_control *devm_reset_control_get(struct device *dev, const char *id); +struct reset_control *__devm_reset_control_get(struct device *dev, + const char *id, int index); int __must_check device_reset(struct device *dev); @@ -23,24 +25,6 @@ static inline int device_reset_optional(struct device *dev) return device_reset(dev); } -static inline struct reset_control *reset_control_get_optional( - struct device *dev, const char *id) -{ - return reset_control_get(dev, id); -} - -static inline struct reset_control *devm_reset_control_get_optional( - struct device *dev, const char *id) -{ - return devm_reset_control_get(dev, id); -} - -struct reset_control *of_reset_control_get(struct device_node *node, - const char *id); - -struct reset_control *of_reset_control_get_by_index( - struct device_node *node, int index); - #else static inline int reset_control_reset(struct reset_control *rstc) @@ -77,44 +61,114 @@ static inline int device_reset_optional(struct device *dev) return -ENOTSUPP; } -static inline struct reset_control *__must_check reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control *__of_reset_control_get( + struct device_node *node, + const char *id, int index) { - WARN_ON(1); return ERR_PTR(-EINVAL); } -static inline struct reset_control *__must_check devm_reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control *__devm_reset_control_get( + struct device *dev, + const char *id, int index) { - WARN_ON(1); return ERR_PTR(-EINVAL); } -static inline struct reset_control *reset_control_get_optional( +#endif /* CONFIG_RESET_CONTROLLER */ + +/** + * reset_control_get - Lookup and obtain a reference to a reset controller. + * @dev: device to be reset by the controller + * @id: reset line name + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ +static inline struct reset_control *__must_check reset_control_get( struct device *dev, const char *id) { - return ERR_PTR(-ENOTSUPP); +#ifndef CONFIG_RESET_CONTROLLER + WARN_ON(1); +#endif + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); } -static inline struct reset_control *devm_reset_control_get_optional( +static inline struct reset_control *reset_control_get_optional( struct device *dev, const char *id) { - return ERR_PTR(-ENOTSUPP); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); } +/** + * of_reset_control_get - Lookup and obtain a reference to a reset controller. + * @node: device to be reset by the controller + * @id: reset line name + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ static inline struct reset_control *of_reset_control_get( struct device_node *node, const char *id) { - return ERR_PTR(-ENOTSUPP); + return __of_reset_control_get(node, id, 0); } +/** + * of_reset_control_get_by_index - Lookup and obtain a reference to a reset + * controller by index. + * @node: device to be reset by the controller + * @index: index of the reset controller + * + * This is to be used to perform a list of resets for a device or power domain + * in whatever order. Returns a struct reset_control or IS_ERR() condition + * containing errno. + */ static inline struct reset_control *of_reset_control_get_by_index( - struct device_node *node, int index) + struct device_node *node, int index) { - return ERR_PTR(-ENOTSUPP); + return __of_reset_control_get(node, NULL, index); } -#endif /* CONFIG_RESET_CONTROLLER */ +/** + * devm_reset_control_get - resource managed reset_control_get() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get(). For reset controllers returned from this + * function, reset_control_put() is called automatically on driver detach. + * See reset_control_get() for more information. + */ +static inline struct reset_control *__must_check devm_reset_control_get( + struct device *dev, const char *id) +{ +#ifndef CONFIG_RESET_CONTROLLER + WARN_ON(1); +#endif + return __devm_reset_control_get(dev, id, 0); +} + +static inline struct reset_control *devm_reset_control_get_optional( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0); +} + +/** + * devm_reset_control_get_by_index - resource managed reset_control_get + * @dev: device to be reset by the controller + * @index: index of the reset controller + * + * Managed reset_control_get(). For reset controllers returned from this + * function, reset_control_put() is called automatically on driver detach. + * See reset_control_get() for more information. + */ +static inline struct reset_control *devm_reset_control_get_by_index( + struct device *dev, int index) +{ + return __devm_reset_control_get(dev, NULL, index); +} #endif -- cgit v1.2.3 From c15ddec2ca06076a11195313aa1fce47d2a28c5d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Feb 2016 18:46:25 +0100 Subject: reset: Share struct reset_control between reset_control_get calls Now that struct reset_control no longer stores the device pointer for the device calling reset_control_get we can share a single struct reset_control when multiple calls to reset_control_get are made for the same reset line (same id / index). This is a preparation patch for adding support for shared reset lines. Signed-off-by: Hans de Goede Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 84 ++++++++++++++++++++++++++++++---------- include/linux/reset-controller.h | 2 + 2 files changed, 65 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/reset/core.c b/drivers/reset/core.c index bdf1763da87a..957750600ff3 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -18,19 +18,23 @@ #include #include -static DEFINE_MUTEX(reset_controller_list_mutex); +static DEFINE_MUTEX(reset_list_mutex); static LIST_HEAD(reset_controller_list); /** * struct reset_control - a reset control * @rcdev: a pointer to the reset controller device * this reset control belongs to + * @list: list entry for the rcdev's reset controller list * @id: ID of the reset controller in the reset * controller device + * @refcnt: Number of gets of this reset_control */ struct reset_control { struct reset_controller_dev *rcdev; + struct list_head list; unsigned int id; + unsigned int refcnt; }; /** @@ -62,9 +66,11 @@ int reset_controller_register(struct reset_controller_dev *rcdev) rcdev->of_xlate = of_reset_simple_xlate; } - mutex_lock(&reset_controller_list_mutex); + INIT_LIST_HEAD(&rcdev->reset_control_head); + + mutex_lock(&reset_list_mutex); list_add(&rcdev->list, &reset_controller_list); - mutex_unlock(&reset_controller_list_mutex); + mutex_unlock(&reset_list_mutex); return 0; } @@ -76,9 +82,9 @@ EXPORT_SYMBOL_GPL(reset_controller_register); */ void reset_controller_unregister(struct reset_controller_dev *rcdev) { - mutex_lock(&reset_controller_list_mutex); + mutex_lock(&reset_list_mutex); list_del(&rcdev->list); - mutex_unlock(&reset_controller_list_mutex); + mutex_unlock(&reset_list_mutex); } EXPORT_SYMBOL_GPL(reset_controller_unregister); @@ -136,6 +142,48 @@ int reset_control_status(struct reset_control *rstc) } EXPORT_SYMBOL_GPL(reset_control_status); +static struct reset_control *__reset_control_get( + struct reset_controller_dev *rcdev, + unsigned int index) +{ + struct reset_control *rstc; + + lockdep_assert_held(&reset_list_mutex); + + list_for_each_entry(rstc, &rcdev->reset_control_head, list) { + if (rstc->id == index) { + rstc->refcnt++; + return rstc; + } + } + + rstc = kzalloc(sizeof(*rstc), GFP_KERNEL); + if (!rstc) + return ERR_PTR(-ENOMEM); + + try_module_get(rcdev->owner); + + rstc->rcdev = rcdev; + list_add(&rstc->list, &rcdev->reset_control_head); + rstc->id = index; + rstc->refcnt = 1; + + return rstc; +} + +static void __reset_control_put(struct reset_control *rstc) +{ + lockdep_assert_held(&reset_list_mutex); + + if (--rstc->refcnt) + return; + + module_put(rstc->rcdev->owner); + + list_del(&rstc->list); + kfree(rstc); +} + struct reset_control *__of_reset_control_get(struct device_node *node, const char *id, int index) { @@ -160,7 +208,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node, if (ret) return ERR_PTR(ret); - mutex_lock(&reset_controller_list_mutex); + mutex_lock(&reset_list_mutex); rcdev = NULL; list_for_each_entry(r, &reset_controller_list, list) { if (args.np == r->of_node) { @@ -171,32 +219,25 @@ struct reset_control *__of_reset_control_get(struct device_node *node, of_node_put(args.np); if (!rcdev) { - mutex_unlock(&reset_controller_list_mutex); + mutex_unlock(&reset_list_mutex); return ERR_PTR(-EPROBE_DEFER); } if (WARN_ON(args.args_count != rcdev->of_reset_n_cells)) { - mutex_unlock(&reset_controller_list_mutex); + mutex_unlock(&reset_list_mutex); return ERR_PTR(-EINVAL); } rstc_id = rcdev->of_xlate(rcdev, &args); if (rstc_id < 0) { - mutex_unlock(&reset_controller_list_mutex); + mutex_unlock(&reset_list_mutex); return ERR_PTR(rstc_id); } - try_module_get(rcdev->owner); - mutex_unlock(&reset_controller_list_mutex); - - rstc = kzalloc(sizeof(*rstc), GFP_KERNEL); - if (!rstc) { - module_put(rcdev->owner); - return ERR_PTR(-ENOMEM); - } + /* reset_list_mutex also protects the rcdev's reset_control list */ + rstc = __reset_control_get(rcdev, rstc_id); - rstc->rcdev = rcdev; - rstc->id = rstc_id; + mutex_unlock(&reset_list_mutex); return rstc; } @@ -212,8 +253,9 @@ void reset_control_put(struct reset_control *rstc) if (IS_ERR(rstc)) return; - module_put(rstc->rcdev->owner); - kfree(rstc); + mutex_lock(&reset_list_mutex); + __reset_control_put(rstc); + mutex_unlock(&reset_list_mutex); } EXPORT_SYMBOL_GPL(reset_control_put); diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index a3a5bcdb1d02..b91ba932bbd4 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -31,6 +31,7 @@ struct of_phandle_args; * @ops: a pointer to device specific struct reset_control_ops * @owner: kernel module of the reset controller driver * @list: internal list of reset controller devices + * @reset_control_head: head of internal list of requested reset controls * @of_node: corresponding device tree node as phandle target * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the @@ -41,6 +42,7 @@ struct reset_controller_dev { const struct reset_control_ops *ops; struct module *owner; struct list_head list; + struct list_head reset_control_head; struct device_node *of_node; int of_reset_n_cells; int (*of_xlate)(struct reset_controller_dev *rcdev, -- cgit v1.2.3 From 0b52297f2288ca239e598afe6c92db83d8d2bfcd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Feb 2016 18:46:26 +0100 Subject: reset: Add support for shared reset controls In some SoCs some hw-blocks share a reset control. Add support for this setup by adding new: reset_control_get_shared() devm_reset_control_get_shared() devm_reset_control_get_shared_by_index() methods to get a reset_control. Note that this patch omits adding of_ variants, if these are needed later they can be easily added. This patch also changes the behavior of the existing exclusive reset_control_get() variants, if these are now called more then once for the same reset_control they will return -EBUSY. To catch existing drivers triggering this error (there should not be any) a WARN_ON(1) is added in this path. When a reset_control is shared, the behavior of reset_control_assert / deassert is changed, for shared reset_controls these will work like the clock-enable/disable and regulator-on/off functions. They will keep a deassert_count, and only (re-)assert the reset after reset_control_assert has been called as many times as reset_control_deassert was called. Calling reset_control_assert without first calling reset_control_deassert is not allowed on a shared reset control. Calling reset_control_reset is also not allowed on a shared reset control. Signed-off-by: Hans de Goede Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 59 +++++++++++++++++++++++++------ include/linux/reset.h | 96 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 129 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 957750600ff3..72b32bd15549 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -8,6 +8,7 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ +#include #include #include #include @@ -29,12 +30,16 @@ static LIST_HEAD(reset_controller_list); * @id: ID of the reset controller in the reset * controller device * @refcnt: Number of gets of this reset_control + * @shared: Is this a shared (1), or an exclusive (0) reset_control? + * @deassert_cnt: Number of times this reset line has been deasserted */ struct reset_control { struct reset_controller_dev *rcdev; struct list_head list; unsigned int id; unsigned int refcnt; + int shared; + atomic_t deassert_count; }; /** @@ -91,9 +96,14 @@ EXPORT_SYMBOL_GPL(reset_controller_unregister); /** * reset_control_reset - reset the controlled device * @rstc: reset controller + * + * Calling this on a shared reset controller is an error. */ int reset_control_reset(struct reset_control *rstc) { + if (WARN_ON(rstc->shared)) + return -EINVAL; + if (rstc->rcdev->ops->reset) return rstc->rcdev->ops->reset(rstc->rcdev, rstc->id); @@ -104,26 +114,48 @@ EXPORT_SYMBOL_GPL(reset_control_reset); /** * reset_control_assert - asserts the reset line * @rstc: reset controller + * + * Calling this on an exclusive reset controller guarantees that the reset + * will be asserted. When called on a shared reset controller the line may + * still be deasserted, as long as other users keep it so. + * + * For shared reset controls a driver cannot expect the hw's registers and + * internal state to be reset, but must be prepared for this to happen. */ int reset_control_assert(struct reset_control *rstc) { - if (rstc->rcdev->ops->assert) - return rstc->rcdev->ops->assert(rstc->rcdev, rstc->id); + if (!rstc->rcdev->ops->assert) + return -ENOTSUPP; - return -ENOTSUPP; + if (rstc->shared) { + if (WARN_ON(atomic_read(&rstc->deassert_count) == 0)) + return -EINVAL; + + if (atomic_dec_return(&rstc->deassert_count) != 0) + return 0; + } + + return rstc->rcdev->ops->assert(rstc->rcdev, rstc->id); } EXPORT_SYMBOL_GPL(reset_control_assert); /** * reset_control_deassert - deasserts the reset line * @rstc: reset controller + * + * After calling this function, the reset is guaranteed to be deasserted. */ int reset_control_deassert(struct reset_control *rstc) { - if (rstc->rcdev->ops->deassert) - return rstc->rcdev->ops->deassert(rstc->rcdev, rstc->id); + if (!rstc->rcdev->ops->deassert) + return -ENOTSUPP; - return -ENOTSUPP; + if (rstc->shared) { + if (atomic_inc_return(&rstc->deassert_count) != 1) + return 0; + } + + return rstc->rcdev->ops->deassert(rstc->rcdev, rstc->id); } EXPORT_SYMBOL_GPL(reset_control_deassert); @@ -144,7 +176,7 @@ EXPORT_SYMBOL_GPL(reset_control_status); static struct reset_control *__reset_control_get( struct reset_controller_dev *rcdev, - unsigned int index) + unsigned int index, int shared) { struct reset_control *rstc; @@ -152,6 +184,9 @@ static struct reset_control *__reset_control_get( list_for_each_entry(rstc, &rcdev->reset_control_head, list) { if (rstc->id == index) { + if (WARN_ON(!rstc->shared || !shared)) + return ERR_PTR(-EBUSY); + rstc->refcnt++; return rstc; } @@ -167,6 +202,7 @@ static struct reset_control *__reset_control_get( list_add(&rstc->list, &rcdev->reset_control_head); rstc->id = index; rstc->refcnt = 1; + rstc->shared = shared; return rstc; } @@ -185,7 +221,7 @@ static void __reset_control_put(struct reset_control *rstc) } struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index) + const char *id, int index, int shared) { struct reset_control *rstc; struct reset_controller_dev *r, *rcdev; @@ -235,7 +271,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } /* reset_list_mutex also protects the rcdev's reset_control list */ - rstc = __reset_control_get(rcdev, rstc_id); + rstc = __reset_control_get(rcdev, rstc_id, shared); mutex_unlock(&reset_list_mutex); @@ -265,7 +301,7 @@ static void devm_reset_control_release(struct device *dev, void *res) } struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index) + const char *id, int index, int shared) { struct reset_control **ptr, *rstc; @@ -274,7 +310,8 @@ struct reset_control *__devm_reset_control_get(struct device *dev, if (!ptr) return ERR_PTR(-ENOMEM); - rstc = __of_reset_control_get(dev ? dev->of_node : NULL, id, index); + rstc = __of_reset_control_get(dev ? dev->of_node : NULL, + id, index, shared); if (!IS_ERR(rstc)) { *ptr = rstc; devres_add(dev, ptr); diff --git a/include/linux/reset.h b/include/linux/reset.h index 1bb69a29d6db..a552134a209e 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -13,10 +13,10 @@ int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index); + const char *id, int index, int shared); void reset_control_put(struct reset_control *rstc); struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index); + const char *id, int index, int shared); int __must_check device_reset(struct device *dev); @@ -63,14 +63,14 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control *__of_reset_control_get( struct device_node *node, - const char *id, int index) + const char *id, int index, int shared) { return ERR_PTR(-EINVAL); } static inline struct reset_control *__devm_reset_control_get( struct device *dev, - const char *id, int index) + const char *id, int index, int shared) { return ERR_PTR(-EINVAL); } @@ -78,11 +78,17 @@ static inline struct reset_control *__devm_reset_control_get( #endif /* CONFIG_RESET_CONTROLLER */ /** - * reset_control_get - Lookup and obtain a reference to a reset controller. + * reset_control_get - Lookup and obtain an exclusive reference to a + * reset controller. * @dev: device to be reset by the controller * @id: reset line name * * Returns a struct reset_control or IS_ERR() condition containing errno. + * If this function is called more then once for the same reset_control it will + * return -EBUSY. + * + * See reset_control_get_shared for details on shared references to + * reset-controls. * * Use of id names is optional. */ @@ -92,17 +98,46 @@ static inline struct reset_control *__must_check reset_control_get( #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } static inline struct reset_control *reset_control_get_optional( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } /** - * of_reset_control_get - Lookup and obtain a reference to a reset controller. + * reset_control_get_shared - Lookup and obtain a shared reference to a + * reset controller. + * @dev: device to be reset by the controller + * @id: reset line name + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * This function is intended for use with reset-controls which are shared + * between hardware-blocks. + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * + * Use of id names is optional. + */ +static inline struct reset_control *reset_control_get_shared( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); +} + +/** + * of_reset_control_get - Lookup and obtain an exclusive reference to a + * reset controller. * @node: device to be reset by the controller * @id: reset line name * @@ -113,12 +148,12 @@ static inline struct reset_control *reset_control_get_optional( static inline struct reset_control *of_reset_control_get( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0); + return __of_reset_control_get(node, id, 0, 0); } /** - * of_reset_control_get_by_index - Lookup and obtain a reference to a reset - * controller by index. + * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to + * a reset controller by index. * @node: device to be reset by the controller * @index: index of the reset controller * @@ -129,7 +164,7 @@ static inline struct reset_control *of_reset_control_get( static inline struct reset_control *of_reset_control_get_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index); + return __of_reset_control_get(node, NULL, index, 0); } /** @@ -147,13 +182,13 @@ static inline struct reset_control *__must_check devm_reset_control_get( #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __devm_reset_control_get(dev, id, 0); + return __devm_reset_control_get(dev, id, 0, 0); } static inline struct reset_control *devm_reset_control_get_optional( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0); + return __devm_reset_control_get(dev, id, 0, 0); } /** @@ -168,7 +203,38 @@ static inline struct reset_control *devm_reset_control_get_optional( static inline struct reset_control *devm_reset_control_get_by_index( struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index); + return __devm_reset_control_get(dev, NULL, index, 0); +} + +/** + * devm_reset_control_get_shared - resource managed reset_control_get_shared() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_shared(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver detach. + * See reset_control_get_shared() for more information. + */ +static inline struct reset_control *devm_reset_control_get_shared( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 1); +} + +/** + * devm_reset_control_get_shared_by_index - resource managed + * reset_control_get_shared + * @dev: device to be reset by the controller + * @index: index of the reset controller + * + * Managed reset_control_get_shared(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver detach. + * See reset_control_get_shared() for more information. + */ +static inline struct reset_control *devm_reset_control_get_shared_by_index( + struct device *dev, int index) +{ + return __devm_reset_control_get(dev, NULL, index, 1); } #endif -- cgit v1.2.3 From 44debe7a123cc760fc90ccbe253210798c917fa7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 30 Mar 2016 11:26:35 +0200 Subject: vgacon: dummy implementation for vgacon_text_force MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to ditch a ton of ugly #ifdefs from a bunch of drm modeset drivers. v2: Make the dummy function actually return a sane value, spotted by Ville. v3: Because the patch is still in limbo there's no more drivers to convert, noticed by Emil. v4: Rebase once more, because hooray. I'll just go ahead an apply this one later on to drm-misc. Cc: Emil Velikov Cc: Ville Syrjälä Cc: Andrew Morton Cc: Greg Kroah-Hartman Reviewed-by: Emil Velikov Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- drivers/gpu/drm/ast/ast_drv.c | 2 -- drivers/gpu/drm/cirrus/cirrus_drv.c | 2 -- drivers/gpu/drm/i915/i915_drv.c | 2 -- drivers/gpu/drm/mgag200/mgag200_drv.c | 2 -- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 -- drivers/gpu/drm/qxl/qxl_drv.c | 2 -- drivers/gpu/drm/radeon/radeon_drv.c | 2 -- drivers/gpu/drm/virtio/virtgpu_drv.c | 2 -- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 -- include/linux/console.h | 2 ++ 11 files changed, 2 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ce79a8b605a0..fba20bd59cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -539,12 +539,10 @@ static struct pci_driver amdgpu_kms_pci_driver = { static int __init amdgpu_init(void) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force()) { DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); return -EINVAL; } -#endif DRM_INFO("amdgpu kernel modesetting enabled.\n"); driver = &kms_driver; pdriver = &amdgpu_kms_pci_driver; diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 9a32d9dfdd26..fcd9c0714836 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -218,10 +218,8 @@ static struct drm_driver driver = { static int __init ast_init(void) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force() && ast_modeset == -1) return -EINVAL; -#endif if (ast_modeset == 0) return -EINVAL; diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c index b1619e29a564..b394e6d8f01e 100644 --- a/drivers/gpu/drm/cirrus/cirrus_drv.c +++ b/drivers/gpu/drm/cirrus/cirrus_drv.c @@ -162,10 +162,8 @@ static struct pci_driver cirrus_pci_driver = { static int __init cirrus_init(void) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force() && cirrus_modeset == -1) return -EINVAL; -#endif if (cirrus_modeset == 0) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 44912ecebc1a..8a62690e6513 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1750,10 +1750,8 @@ static int __init i915_init(void) if (i915.modeset == 0) driver.driver_features &= ~DRIVER_MODESET; -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force() && i915.modeset == -1) driver.driver_features &= ~DRIVER_MODESET; -#endif if (!(driver.driver_features & DRIVER_MODESET)) { /* Silently fail loading to not upset userspace. */ diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c index b0af77454d52..ebb470ff7200 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.c +++ b/drivers/gpu/drm/mgag200/mgag200_drv.c @@ -116,10 +116,8 @@ static struct pci_driver mgag200_pci_driver = { static int __init mgag200_init(void) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force() && mgag200_modeset == -1) return -EINVAL; -#endif if (mgag200_modeset == 0) return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index bb8498c9b13e..731c5c2a8933 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1082,10 +1082,8 @@ nouveau_drm_init(void) nouveau_display_options(); if (nouveau_modeset == -1) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force()) nouveau_modeset = 0; -#endif } if (!nouveau_modeset) diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 7307b07fe06b..dc9df5fe50ba 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -272,10 +272,8 @@ static struct drm_driver qxl_driver = { static int __init qxl_init(void) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force() && qxl_modeset == -1) return -EINVAL; -#endif if (qxl_modeset == 0) return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index cad25557650f..ad136fc081c8 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -558,12 +558,10 @@ static struct pci_driver radeon_kms_pci_driver = { static int __init radeon_init(void) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force() && radeon_modeset == -1) { DRM_INFO("VGACON disable radeon kernel modesetting.\n"); radeon_modeset = 0; } -#endif /* set to modesetting by default if not nomodeset */ if (radeon_modeset == -1) radeon_modeset = 1; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 7f898cfdc746..3cc7afa77a35 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -42,10 +42,8 @@ module_param_named(modeset, virtio_gpu_modeset, int, 0400); static int virtio_gpu_probe(struct virtio_device *vdev) { -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force() && virtio_gpu_modeset == -1) return -EINVAL; -#endif if (virtio_gpu_modeset == 0) return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 0ee76e523a90..fa10395e2a18 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1529,10 +1529,8 @@ static int __init vmwgfx_init(void) { int ret; -#ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force()) return -EINVAL; -#endif ret = drm_pci_init(&driver, &vmw_pci_driver); if (ret) diff --git a/include/linux/console.h b/include/linux/console.h index ea731af2451e..e49cc1ef19be 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -191,6 +191,8 @@ void vcs_remove_sysfs(int index); #ifdef CONFIG_VGA_CONSOLE extern bool vgacon_text_force(void); +#else +static inline bool vgacon_text_force(void) { return false; } #endif #endif /* _LINUX_CONSOLE_H */ -- cgit v1.2.3 From 6a0028b3dd67b86d7265ed873c8738743adec855 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 30 Mar 2016 12:04:30 -0700 Subject: regulator: Deprecate regulator_can_change_voltage() All current users of regulator_can_change_voltage() are abusing it, using it to wrap a call to regulator_set_voltage() on probe without any alternative handling for fixed voltages. Drivers should only be using regulator_set_voltage() if they need to vary voltages at runtime, fixed voltages should normally be set via machine constraints, and calling regulator_set_voltage() on a regulator which can't be varied will succeed if the current voltage is within the range requested so users shouldn't worry if they have permission to vary normally. Deprecate the API to try to stop any new users appearing while we fix the current callers. Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 48603506f8de..80dc4e51d14a 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -224,7 +224,7 @@ int regulator_bulk_force_disable(int num_consumers, void regulator_bulk_free(int num_consumers, struct regulator_bulk_data *consumers); -int regulator_can_change_voltage(struct regulator *regulator); +int __deprecated regulator_can_change_voltage(struct regulator *regulator); int regulator_count_voltages(struct regulator *regulator); int regulator_list_voltage(struct regulator *regulator, unsigned selector); int regulator_is_supported_voltage(struct regulator *regulator, @@ -436,7 +436,7 @@ static inline void regulator_bulk_free(int num_consumers, { } -static inline int regulator_can_change_voltage(struct regulator *regulator) +static inline int __deprecated regulator_can_change_voltage(struct regulator *regulator) { return 0; } -- cgit v1.2.3 From e8b123e6008480b2b8d80dae060315d84b79f4bb Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 24 Dec 2015 00:28:38 -0800 Subject: soc: qcom: smem_state: Add stubs for disabled smem_state Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smem_state.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smem_state.h b/include/linux/soc/qcom/smem_state.h index f35e1512fcaa..7b88697929e9 100644 --- a/include/linux/soc/qcom/smem_state.h +++ b/include/linux/soc/qcom/smem_state.h @@ -1,12 +1,17 @@ #ifndef __QCOM_SMEM_STATE__ #define __QCOM_SMEM_STATE__ +#include + +struct device_node; struct qcom_smem_state; struct qcom_smem_state_ops { int (*update_bits)(void *, u32, u32); }; +#ifdef CONFIG_QCOM_SMEM_STATE + struct qcom_smem_state *qcom_smem_state_get(struct device *dev, const char *con_id, unsigned *bit); void qcom_smem_state_put(struct qcom_smem_state *); @@ -15,4 +20,34 @@ int qcom_smem_state_update_bits(struct qcom_smem_state *state, u32 mask, u32 val struct qcom_smem_state *qcom_smem_state_register(struct device_node *of_node, const struct qcom_smem_state_ops *ops, void *data); void qcom_smem_state_unregister(struct qcom_smem_state *state); +#else + +static inline struct qcom_smem_state *qcom_smem_state_get(struct device *dev, + const char *con_id, unsigned *bit) +{ + return ERR_PTR(-EINVAL); +} + +static inline void qcom_smem_state_put(struct qcom_smem_state *state) +{ +} + +static inline int qcom_smem_state_update_bits(struct qcom_smem_state *state, + u32 mask, u32 value) +{ + return -EINVAL; +} + +static inline struct qcom_smem_state *qcom_smem_state_register(struct device_node *of_node, + const struct qcom_smem_state_ops *ops, void *data) +{ + return ERR_PTR(-EINVAL); +} + +static inline void qcom_smem_state_unregister(struct qcom_smem_state *state) +{ +} + +#endif + #endif -- cgit v1.2.3 From 39f0db298e7c02a29371fb39cabdd5d76e6b726c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 17 Feb 2016 22:39:02 -0800 Subject: soc: qcom: smd: Introduce callback setter Introduce a setter for the callback function pointer to clarify the locking around the operation and to reduce some duplication. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/soc/qcom/smd.c | 25 +++++++++++++++++-------- include/linux/soc/qcom/smd.h | 4 +++- 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c index 498fd0581a45..c357842b92e1 100644 --- a/drivers/soc/qcom/smd.c +++ b/drivers/soc/qcom/smd.c @@ -186,7 +186,7 @@ struct qcom_smd_channel { int fifo_size; void *bounce_buffer; - int (*cb)(struct qcom_smd_device *, const void *, size_t); + qcom_smd_cb_t cb; spinlock_t recv_lock; @@ -377,6 +377,19 @@ static void qcom_smd_channel_reset(struct qcom_smd_channel *channel) channel->pkt_size = 0; } +/* + * Set the callback for a channel, with appropriate locking + */ +static void qcom_smd_channel_set_callback(struct qcom_smd_channel *channel, + qcom_smd_cb_t cb) +{ + unsigned long flags; + + spin_lock_irqsave(&channel->recv_lock, flags); + channel->cb = cb; + spin_unlock_irqrestore(&channel->recv_lock, flags); +}; + /* * Calculate the amount of data available in the rx fifo */ @@ -814,8 +827,7 @@ static int qcom_smd_dev_probe(struct device *dev) if (!channel->bounce_buffer) return -ENOMEM; - channel->cb = qsdrv->callback; - + qcom_smd_channel_set_callback(channel, qsdrv->callback); qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENING); qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENED); @@ -831,7 +843,7 @@ static int qcom_smd_dev_probe(struct device *dev) err: dev_err(&qsdev->dev, "probe failed\n"); - channel->cb = NULL; + qcom_smd_channel_set_callback(channel, NULL); kfree(channel->bounce_buffer); channel->bounce_buffer = NULL; @@ -850,16 +862,13 @@ static int qcom_smd_dev_remove(struct device *dev) struct qcom_smd_device *qsdev = to_smd_device(dev); struct qcom_smd_driver *qsdrv = to_smd_driver(dev); struct qcom_smd_channel *channel = qsdev->channel; - unsigned long flags; qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSING); /* * Make sure we don't race with the code receiving data. */ - spin_lock_irqsave(&channel->recv_lock, flags); - channel->cb = NULL; - spin_unlock_irqrestore(&channel->recv_lock, flags); + qcom_smd_channel_set_callback(channel, NULL); /* Wake up any sleepers in qcom_smd_send() */ wake_up_interruptible(&channel->fblockread_event); diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index d0cb6d189a0a..65a64fcdb1aa 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -26,6 +26,8 @@ struct qcom_smd_device { struct qcom_smd_channel *channel; }; +typedef int (*qcom_smd_cb_t)(struct qcom_smd_device *, const void *, size_t); + /** * struct qcom_smd_driver - smd driver struct * @driver: underlying device driver @@ -42,7 +44,7 @@ struct qcom_smd_driver { int (*probe)(struct qcom_smd_device *dev); void (*remove)(struct qcom_smd_device *dev); - int (*callback)(struct qcom_smd_device *, const void *, size_t); + qcom_smd_cb_t callback; }; int qcom_smd_driver_register(struct qcom_smd_driver *drv); -- cgit v1.2.3 From 028021d29ea069390e1f60c6aa5b3511d218454b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 17 Feb 2016 22:39:06 -0800 Subject: soc: qcom: smd: Support opening additional channels With the qcom_smd_open_channel() API we allow SMD devices to open additional SMD channels, to allow implementation of multi-channel SMD devices - like Bluetooth. Channels are opened from the same edge as the calling SMD device is tied to. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/soc/qcom/smd.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/soc/qcom/smd.h | 4 +++ 2 files changed, 80 insertions(+) (limited to 'include/linux') diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c index c3fa0fd724f7..b6434c4be86a 100644 --- a/drivers/soc/qcom/smd.c +++ b/drivers/soc/qcom/smd.c @@ -129,6 +129,8 @@ struct qcom_smd_edge { unsigned smem_available; + wait_queue_head_t new_channel_event; + struct work_struct scan_work; struct work_struct state_work; }; @@ -1042,6 +1044,77 @@ void qcom_smd_driver_unregister(struct qcom_smd_driver *qsdrv) } EXPORT_SYMBOL(qcom_smd_driver_unregister); +static struct qcom_smd_channel * +qcom_smd_find_channel(struct qcom_smd_edge *edge, const char *name) +{ + struct qcom_smd_channel *channel; + struct qcom_smd_channel *ret = NULL; + unsigned long flags; + unsigned state; + + spin_lock_irqsave(&edge->channels_lock, flags); + list_for_each_entry(channel, &edge->channels, list) { + if (strcmp(channel->name, name)) + continue; + + state = GET_RX_CHANNEL_INFO(channel, state); + if (state != SMD_CHANNEL_OPENING && + state != SMD_CHANNEL_OPENED) + continue; + + ret = channel; + break; + } + spin_unlock_irqrestore(&edge->channels_lock, flags); + + return ret; +} + +/** + * qcom_smd_open_channel() - claim additional channels on the same edge + * @sdev: smd_device handle + * @name: channel name + * @cb: callback method to use for incoming data + * + * Returns a channel handle on success, or -EPROBE_DEFER if the channel isn't + * ready. + */ +struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_device *sdev, + const char *name, + qcom_smd_cb_t cb) +{ + struct qcom_smd_channel *channel; + struct qcom_smd_edge *edge = sdev->channel->edge; + int ret; + + /* Wait up to HZ for the channel to appear */ + ret = wait_event_interruptible_timeout(edge->new_channel_event, + (channel = qcom_smd_find_channel(edge, name)) != NULL, + HZ); + if (!ret) + return ERR_PTR(-ETIMEDOUT); + + if (channel->state != SMD_CHANNEL_CLOSED) { + dev_err(&sdev->dev, "channel %s is busy\n", channel->name); + return ERR_PTR(-EBUSY); + } + + channel->qsdev = sdev; + ret = qcom_smd_channel_open(channel, cb); + if (ret) { + channel->qsdev = NULL; + return ERR_PTR(ret); + } + + /* + * Append the list of channel to the channels associated with the sdev + */ + list_add_tail(&channel->dev_list, &sdev->channel->dev_list); + + return channel; +} +EXPORT_SYMBOL(qcom_smd_open_channel); + /* * Allocate the qcom_smd_channel object for a newly found smd channel, * retrieving and validating the smem items involved. @@ -1178,6 +1251,8 @@ static void qcom_channel_scan_worker(struct work_struct *work) dev_dbg(smd->dev, "new channel found: '%s'\n", channel->name); set_bit(i, edge->allocated[tbl]); + + wake_up_interruptible(&edge->new_channel_event); } } @@ -1341,6 +1416,7 @@ static int qcom_smd_probe(struct platform_device *pdev) for_each_available_child_of_node(pdev->dev.of_node, node) { edge = &smd->edges[i++]; edge->smd = smd; + init_waitqueue_head(&edge->new_channel_event); ret = qcom_smd_parse_edge(&pdev->dev, node, edge); if (ret) diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index 65a64fcdb1aa..bd51c8a9d807 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -56,4 +56,8 @@ void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); +struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_device *sdev, + const char *name, + qcom_smd_cb_t cb); + #endif -- cgit v1.2.3 From b8a7a3a6674725d7ca0ff6e322f6c1cab6e6a11d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2016 14:38:37 +0100 Subject: posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/9p/acl.c | 2 +- fs/btrfs/acl.c | 3 -- fs/ceph/acl.c | 2 + fs/ext2/acl.c | 3 -- fs/ext4/acl.c | 3 -- fs/f2fs/acl.c | 3 -- fs/hfsplus/posix_acl.c | 3 -- fs/inode.c | 4 +- fs/jffs2/acl.c | 2 - fs/jfs/acl.c | 2 - fs/namei.c | 2 +- fs/nfs/nfs3acl.c | 43 +++++++++++++++++++- fs/ocfs2/dlmglue.c | 3 ++ fs/posix_acl.c | 103 ++++++++++++++++++++++++++++++++---------------- fs/reiserfs/xattr_acl.c | 6 +-- fs/xfs/xfs_acl.c | 20 +++------- include/linux/fs.h | 12 ++++++ 17 files changed, 138 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 9da967f38387..2d94e94b6b59 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -93,7 +93,7 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) * instantiating the inode (v9fs_inode_from_fid) */ acl = get_cached_acl(inode, type); - BUG_ON(acl == ACL_NOT_CACHED); + BUG_ON(is_uncached_acl(acl)); return acl; } diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6d263bb1621c..67a607709d4f 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -63,9 +63,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) } kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index f19708487e2f..5457f216e2e5 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -37,6 +37,8 @@ static inline void ceph_set_cached_acl(struct inode *inode, spin_lock(&ci->i_ceph_lock); if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)) set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); spin_unlock(&ci->i_ceph_lock); } diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 27695e6f4e46..42f1d1814083 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -172,9 +172,6 @@ ext2_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 69b1e73026a5..c6601a476c02 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -172,9 +172,6 @@ ext4_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index c8f25f7241f0..6f1fdda977b3 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -190,9 +190,6 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c index afb33eda6d7d..ab7ea2506b4d 100644 --- a/fs/hfsplus/posix_acl.c +++ b/fs/hfsplus/posix_acl.c @@ -48,9 +48,6 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/inode.c b/fs/inode.c index 69b8b526c194..4202aac99464 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -238,9 +238,9 @@ void __destroy_inode(struct inode *inode) } #ifdef CONFIG_FS_POSIX_ACL - if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) + if (inode->i_acl && !is_uncached_acl(inode->i_acl)) posix_acl_release(inode->i_acl); - if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) + if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl)) posix_acl_release(inode->i_default_acl); #endif this_cpu_dec(nr_inodes); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 2f7a3c090489..bc2693d56298 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -203,8 +203,6 @@ struct posix_acl *jffs2_get_acl(struct inode *inode, int type) acl = ERR_PTR(rc); } kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); return acl; } diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index ab4882801b24..21fa92ba2c19 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -63,8 +63,6 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type) acl = posix_acl_from_xattr(&init_user_ns, value, size); } kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); return acl; } diff --git a/fs/namei.c b/fs/namei.c index 794f81dce766..3498d53de26f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -265,7 +265,7 @@ static int check_acl(struct inode *inode, int mask) if (!acl) return -EAGAIN; /* no ->get_acl() calls in RCU mode... */ - if (acl == ACL_NOT_CACHED) + if (is_uncached_acl(acl)) return -ECHILD; return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); } diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 17c0fa1eccfa..720d92f5abfb 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -11,6 +11,38 @@ #define NFSDBG_FACILITY NFSDBG_PROC +/* + * nfs3_prepare_get_acl, nfs3_complete_get_acl, nfs3_abort_get_acl: Helpers for + * caching get_acl results in a race-free way. See fs/posix_acl.c:get_acl() + * for explanations. + */ +static void nfs3_prepare_get_acl(struct posix_acl **p) +{ + struct posix_acl *sentinel = uncached_acl_sentinel(current); + + if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) { + /* Not the first reader or sentinel already in place. */ + } +} + +static void nfs3_complete_get_acl(struct posix_acl **p, struct posix_acl *acl) +{ + struct posix_acl *sentinel = uncached_acl_sentinel(current); + + /* Only cache the ACL if our sentinel is still in place. */ + posix_acl_dup(acl); + if (cmpxchg(p, sentinel, acl) != sentinel) + posix_acl_release(acl); +} + +static void nfs3_abort_get_acl(struct posix_acl **p) +{ + struct posix_acl *sentinel = uncached_acl_sentinel(current); + + /* Remove our sentinel upon failure. */ + cmpxchg(p, sentinel, ACL_NOT_CACHED); +} + struct posix_acl *nfs3_get_acl(struct inode *inode, int type) { struct nfs_server *server = NFS_SERVER(inode); @@ -55,6 +87,11 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) if (res.fattr == NULL) return ERR_PTR(-ENOMEM); + if (args.mask & NFS_ACL) + nfs3_prepare_get_acl(&inode->i_acl); + if (args.mask & NFS_DFACL) + nfs3_prepare_get_acl(&inode->i_default_acl); + status = rpc_call_sync(server->client_acl, &msg, 0); dprintk("NFS reply getacl: %d\n", status); @@ -89,12 +126,12 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) } if (res.mask & NFS_ACL) - set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access); + nfs3_complete_get_acl(&inode->i_acl, res.acl_access); else forget_cached_acl(inode, ACL_TYPE_ACCESS); if (res.mask & NFS_DFACL) - set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default); + nfs3_complete_get_acl(&inode->i_default_acl, res.acl_default); else forget_cached_acl(inode, ACL_TYPE_DEFAULT); @@ -108,6 +145,8 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) } getout: + nfs3_abort_get_acl(&inode->i_acl); + nfs3_abort_get_acl(&inode->i_default_acl); posix_acl_release(res.acl_access); posix_acl_release(res.acl_default); nfs_free_fattr(res.fattr); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 474e57f834e6..1eaa9100c889 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -54,6 +54,7 @@ #include "uptodate.h" #include "quota.h" #include "refcounttree.h" +#include "acl.h" #include "buffer_head_io.h" @@ -3623,6 +3624,8 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, filemap_fdatawait(mapping); } + forget_all_cached_acls(inode); + out: return UNBLOCK_CONTINUE; } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 711dd5170376..bc6736d60786 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -37,14 +37,18 @@ EXPORT_SYMBOL(acl_by_type); struct posix_acl *get_cached_acl(struct inode *inode, int type) { struct posix_acl **p = acl_by_type(inode, type); - struct posix_acl *acl = ACCESS_ONCE(*p); - if (acl) { - spin_lock(&inode->i_lock); - acl = *p; - if (acl != ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); + struct posix_acl *acl; + + for (;;) { + rcu_read_lock(); + acl = rcu_dereference(*p); + if (!acl || is_uncached_acl(acl) || + atomic_inc_not_zero(&acl->a_refcount)) + break; + rcu_read_unlock(); + cpu_relax(); } + rcu_read_unlock(); return acl; } EXPORT_SYMBOL(get_cached_acl); @@ -59,58 +63,72 @@ void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) { struct posix_acl **p = acl_by_type(inode, type); struct posix_acl *old; - spin_lock(&inode->i_lock); - old = *p; - rcu_assign_pointer(*p, posix_acl_dup(acl)); - spin_unlock(&inode->i_lock); - if (old != ACL_NOT_CACHED) + + old = xchg(p, posix_acl_dup(acl)); + if (!is_uncached_acl(old)) posix_acl_release(old); } EXPORT_SYMBOL(set_cached_acl); -void forget_cached_acl(struct inode *inode, int type) +static void __forget_cached_acl(struct posix_acl **p) { - struct posix_acl **p = acl_by_type(inode, type); struct posix_acl *old; - spin_lock(&inode->i_lock); - old = *p; - *p = ACL_NOT_CACHED; - spin_unlock(&inode->i_lock); - if (old != ACL_NOT_CACHED) + + old = xchg(p, ACL_NOT_CACHED); + if (!is_uncached_acl(old)) posix_acl_release(old); } + +void forget_cached_acl(struct inode *inode, int type) +{ + __forget_cached_acl(acl_by_type(inode, type)); +} EXPORT_SYMBOL(forget_cached_acl); void forget_all_cached_acls(struct inode *inode) { - struct posix_acl *old_access, *old_default; - spin_lock(&inode->i_lock); - old_access = inode->i_acl; - old_default = inode->i_default_acl; - inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; - spin_unlock(&inode->i_lock); - if (old_access != ACL_NOT_CACHED) - posix_acl_release(old_access); - if (old_default != ACL_NOT_CACHED) - posix_acl_release(old_default); + __forget_cached_acl(&inode->i_acl); + __forget_cached_acl(&inode->i_default_acl); } EXPORT_SYMBOL(forget_all_cached_acls); struct posix_acl *get_acl(struct inode *inode, int type) { + void *sentinel; + struct posix_acl **p; struct posix_acl *acl; + /* + * The sentinel is used to detect when another operation like + * set_cached_acl() or forget_cached_acl() races with get_acl(). + * It is guaranteed that is_uncached_acl(sentinel) is true. + */ + acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) + if (!is_uncached_acl(acl)) return acl; if (!IS_POSIXACL(inode)) return NULL; + sentinel = uncached_acl_sentinel(current); + p = acl_by_type(inode, type); + /* - * A filesystem can force a ACL callback by just never filling the - * ACL cache. But normally you'd fill the cache either at inode - * instantiation time, or on the first ->get_acl call. + * If the ACL isn't being read yet, set our sentinel. Otherwise, the + * current value of the ACL will not be ACL_NOT_CACHED and so our own + * sentinel will not be set; another task will update the cache. We + * could wait for that other task to complete its job, but it's easier + * to just call ->get_acl to fetch the ACL ourself. (This is going to + * be an unlikely race.) + */ + if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) + /* fall through */ ; + + /* + * Normally, the ACL returned by ->get_acl will be cached. + * A filesystem can prevent that by calling + * forget_cached_acl(inode, type) in ->get_acl. * * If the filesystem doesn't have a get_acl() function at all, we'll * just create the negative cache entry. @@ -119,7 +137,24 @@ struct posix_acl *get_acl(struct inode *inode, int type) set_cached_acl(inode, type, NULL); return NULL; } - return inode->i_op->get_acl(inode, type); + acl = inode->i_op->get_acl(inode, type); + + if (IS_ERR(acl)) { + /* + * Remove our sentinel so that we don't block future attempts + * to cache the ACL. + */ + cmpxchg(p, sentinel, ACL_NOT_CACHED); + return acl; + } + + /* + * Cache the result, but only if our sentinel is still in place. + */ + posix_acl_dup(acl); + if (unlikely(cmpxchg(p, sentinel, acl) != sentinel)) + posix_acl_release(acl); + return acl; } EXPORT_SYMBOL(get_acl); diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index ec74bbedc873..dbed42f755e0 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -197,10 +197,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) size = reiserfs_xattr_get(inode, name, NULL, 0); if (size < 0) { - if (size == -ENODATA || size == -ENOSYS) { - set_cached_acl(inode, type, NULL); + if (size == -ENODATA || size == -ENOSYS) return NULL; - } return ERR_PTR(size); } @@ -220,8 +218,6 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) } else { acl = reiserfs_posix_acl_from_disk(value, retval); } - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); kfree(value); return acl; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 2d5df1f23bbc..b6e527b8eccb 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -158,22 +158,14 @@ xfs_get_acl(struct inode *inode, int type) if (error) { /* * If the attribute doesn't exist make sure we have a negative - * cache entry, for any other error assume it is transient and - * leave the cache entry as ACL_NOT_CACHED. + * cache entry, for any other error assume it is transient. */ - if (error == -ENOATTR) - goto out_update_cache; - acl = ERR_PTR(error); - goto out; + if (error != -ENOATTR) + acl = ERR_PTR(error); + } else { + acl = xfs_acl_from_disk(xfs_acl, len, + XFS_ACL_MAX_ENTRIES(ip->i_mount)); } - - acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); - if (IS_ERR(acl)) - goto out; - -out_update_cache: - set_cached_acl(inode, type, acl); -out: kmem_free(xfs_acl); return acl; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..329ed372d708 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -577,6 +577,18 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +static inline struct posix_acl * +uncached_acl_sentinel(struct task_struct *task) +{ + return (void *)task + 1; +} + +static inline bool +is_uncached_acl(struct posix_acl *acl) +{ + return (long)acl & 1; +} + #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 -- cgit v1.2.3 From 04c57f4501909b60353031cfe5b991751d745658 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2016 14:38:38 +0100 Subject: posix_acl: Unexport acl_by_type and make it static acl_by_type(inode, type) returns a pointer to either inode->i_acl or inode->i_default_acl depending on type. This is useful in fs/posix_acl.c, but should never have been visible outside that file. Signed-off-by: Andreas Gruenbacher Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/posix_acl.c | 3 +-- include/linux/posix_acl.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/posix_acl.c b/fs/posix_acl.c index bc6736d60786..db1fb0f5d9ff 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -21,7 +21,7 @@ #include #include -struct posix_acl **acl_by_type(struct inode *inode, int type) +static struct posix_acl **acl_by_type(struct inode *inode, int type) { switch (type) { case ACL_TYPE_ACCESS: @@ -32,7 +32,6 @@ struct posix_acl **acl_by_type(struct inode *inode, int type) BUG(); } } -EXPORT_SYMBOL(acl_by_type); struct posix_acl *get_cached_acl(struct inode *inode, int type) { diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 3e96a6a76103..5b5a80cc5926 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -99,7 +99,6 @@ extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, extern int simple_set_acl(struct inode *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); -struct posix_acl **acl_by_type(struct inode *inode, int type); struct posix_acl *get_cached_acl(struct inode *inode, int type); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); -- cgit v1.2.3 From 1879445dfa7bbd6fe21b09c5cc72f4934798afed Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 28 Mar 2016 06:41:30 +0000 Subject: perf/core: Set event's default ::overflow_handler() Set a default event->overflow_handler in perf_event_alloc() so don't need to check event->overflow_handler in __perf_event_overflow(). Following commits can give a different default overflow_handler. Initial idea comes from Peter: http://lkml.kernel.org/r/20130708121557.GA17211@twins.programming.kicks-ass.net Since the default value of event->overflow_handler is not NULL, existing 'if (!overflow_handler)' checks need to be changed. is_default_overflow_handler() is introduced for this. No extra performance overhead is introduced into the hot path because in the original code we still need to read this handler from memory. A conditional branch is avoided so actually we remove some instructions. Signed-off-by: Wang Nan Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Arnaldo Carvalho de Melo Cc: Brendan Gregg Cc: He Kuang Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Zefan Li Link: http://lkml.kernel.org/r/1459147292-239310-3-git-send-email-wangnan0@huawei.com Signed-off-by: Ingo Molnar --- arch/arm/kernel/hw_breakpoint.c | 4 ++-- arch/arm64/kernel/hw_breakpoint.c | 4 ++-- include/linux/perf_event.h | 6 ++++++ kernel/events/core.c | 14 ++++++++------ 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 6284779d64ee..b8df45883cf7 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) info->address &= ~alignment_mask; info->ctrl.len <<= offset; - if (!bp->overflow_handler) { + if (is_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!wp->overflow_handler) + if (is_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); unlock: diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b45c95d34b83..4ef5373f9a76 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (!bp->overflow_handler) + if (is_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, perf_bp_event(wp, regs); /* Do we need to handle the stepping? */ - if (!wp->overflow_handler) + if (is_default_overflow_handler(wp)) step = 1; unlock: diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 15588d4c581d..4065ca2d7149 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -838,6 +838,12 @@ extern void perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +static inline bool +is_default_overflow_handler(struct perf_event *event) +{ + return (event->overflow_handler == perf_event_output); +} + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, diff --git a/kernel/events/core.c b/kernel/events/core.c index 51386e84293e..8c3b35f2a269 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6628,10 +6628,7 @@ static int __perf_event_overflow(struct perf_event *event, irq_work_queue(&event->pending); } - if (event->overflow_handler) - event->overflow_handler(event, data, regs); - else - perf_event_output(event, data, regs); + event->overflow_handler(event, data, regs); if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; @@ -8152,8 +8149,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, context = parent_event->overflow_handler_context; } - event->overflow_handler = overflow_handler; - event->overflow_handler_context = context; + if (overflow_handler) { + event->overflow_handler = overflow_handler; + event->overflow_handler_context = context; + } else { + event->overflow_handler = perf_event_output; + event->overflow_handler_context = NULL; + } perf_event__state_init(event); -- cgit v1.2.3 From 274529ba9bda86c91c2c06da3a641aaf617dd30f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 21 Mar 2016 19:46:04 -0700 Subject: rcu: Consolidate dumping of ftrace buffer This commit consolidates a couple definitions and several calls for single-shot ftrace-buffer dumping. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 13 +++++++++++++ kernel/rcu/rcutorture.c | 17 +++-------------- kernel/rcu/tree.c | 4 ++-- 3 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2657aff2725b..45de591657a6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1144,4 +1144,17 @@ static inline void rcu_sysidle_force_exit(void) #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ +/* + * Dump the ftrace buffer, but only one time per callsite per boot. + */ +#define rcu_ftrace_dump(oops_dump_mode) \ +do { \ + static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ + \ + if (!atomic_read(&___rfd_beenhere) && \ + !atomic_xchg(&___rfd_beenhere, 1)) \ + ftrace_dump(oops_dump_mode); \ +} while (0) + + #endif /* __LINUX_RCUPDATE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 250ea67c1615..463867c43221 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1082,17 +1082,6 @@ rcu_torture_fakewriter(void *arg) return 0; } -static void rcutorture_trace_dump(void) -{ - static atomic_t beenhere = ATOMIC_INIT(0); - - if (atomic_read(&beenhere)) - return; - if (atomic_xchg(&beenhere, 1) != 0) - return; - ftrace_dump(DUMP_ALL); -} - /* * RCU torture reader from timer handler. Dereferences rcu_torture_current, * incrementing the corresponding element of the pipeline array. The @@ -1142,7 +1131,7 @@ static void rcu_torture_timer(unsigned long unused) if (pipe_count > 1) { do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, started, completed); - rcutorture_trace_dump(); + rcu_ftrace_dump(DUMP_ALL); } __this_cpu_inc(rcu_torture_count[pipe_count]); completed = completed - started; @@ -1215,7 +1204,7 @@ rcu_torture_reader(void *arg) if (pipe_count > 1) { do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, started, completed); - rcutorture_trace_dump(); + rcu_ftrace_dump(DUMP_ALL); } __this_cpu_inc(rcu_torture_count[pipe_count]); completed = completed - started; @@ -1333,7 +1322,7 @@ rcu_torture_stats_print(void) rcu_torture_writer_state, gpnum, completed, flags); show_rcu_gp_kthreads(); - rcutorture_trace_dump(); + rcu_ftrace_dump(DUMP_ALL); } rtcv_snap = rcu_torture_current_version; } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9a535a86e732..531a328076bd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -637,7 +637,7 @@ static void rcu_eqs_enter_common(long long oldval, bool user) idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0); - ftrace_dump(DUMP_ORIG); + rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ @@ -799,7 +799,7 @@ static void rcu_eqs_exit_common(long long oldval, int user) trace_rcu_dyntick(TPS("Error on exit: not idle task"), oldval, rdtp->dynticks_nesting); - ftrace_dump(DUMP_ORIG); + rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ -- cgit v1.2.3 From 293e2421fe25839500207eda123cc4475f8d17b8 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 23 Mar 2016 23:11:48 +0800 Subject: rcu: Remove superfluous versions of rcu_read_lock_sched_held() Currently, we have four versions of rcu_read_lock_sched_held(), depending on the combined choices on PREEMPT_COUNT and DEBUG_LOCK_ALLOC. However, there is an existing function preemptible() that already distinguishes between the PREEMPT_COUNT=y and PREEMPT_COUNT=n cases, and allows these four implementations to be consolidated down to two. This commit therefore uses preemptible() to achieve this consolidation. Note that there could be a small performance regression in the case of CONFIG_DEBUG_LOCK_ALLOC=y && PREEMPT_COUNT=n. However, given the overhead associated with CONFIG_DEBUG_LOCK_ALLOC=y, this should be down in the noise. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 17 +---------------- kernel/rcu/update.c | 4 ++-- 2 files changed, 3 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 45de591657a6..5f1533e3d032 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -508,14 +508,7 @@ int rcu_read_lock_bh_held(void); * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. */ -#ifdef CONFIG_PREEMPT_COUNT int rcu_read_lock_sched_held(void); -#else /* #ifdef CONFIG_PREEMPT_COUNT */ -static inline int rcu_read_lock_sched_held(void) -{ - return 1; -} -#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -532,18 +525,10 @@ static inline int rcu_read_lock_bh_held(void) return 1; } -#ifdef CONFIG_PREEMPT_COUNT static inline int rcu_read_lock_sched_held(void) { - return preempt_count() != 0 || irqs_disabled(); + return !preemptible(); } -#else /* #ifdef CONFIG_PREEMPT_COUNT */ -static inline int rcu_read_lock_sched_held(void) -{ - return 1; -} -#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ - #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index ca828b41c938..3ccdc8eebc5a 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -67,7 +67,7 @@ static int rcu_normal_after_boot; module_param(rcu_normal_after_boot, int, 0); #endif /* #ifndef CONFIG_TINY_RCU */ -#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT) +#ifdef CONFIG_DEBUG_LOCK_ALLOC /** * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? * @@ -111,7 +111,7 @@ int rcu_read_lock_sched_held(void) return 0; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); - return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); + return lockdep_opinion || !preemptible(); } EXPORT_SYMBOL(rcu_read_lock_sched_held); #endif -- cgit v1.2.3 From 291783b8ad77a83a6fdf91d55eee7f1ad72ed4d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jan 2016 13:43:30 -0800 Subject: rcutorture: Expedited-GP batch progress access to torturing This commit provides rcu_exp_batches_completed() and rcu_exp_batches_completed_sched() functions to allow torture-test modules to check how many expedited grace period batches have completed. These are analogous to the existing rcu_batches_completed(), rcu_batches_completed_bh(), and rcu_batches_completed_sched() functions. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 16 ++++++++++++++++ include/linux/rcutree.h | 2 ++ kernel/rcu/tree.c | 22 ++++++++++++++++++++++ 3 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 64809aea661c..93aea75029fb 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -149,6 +149,22 @@ static inline unsigned long rcu_batches_completed_sched(void) return 0; } +/* + * Return the number of expedited grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of expedited sched grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed_sched(void) +{ + return 0; +} + static inline void rcu_force_quiescent_state(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index ad1eda9fa4da..5043cb823fb2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -87,6 +87,8 @@ unsigned long rcu_batches_started_sched(void); unsigned long rcu_batches_completed(void); unsigned long rcu_batches_completed_bh(void); unsigned long rcu_batches_completed_sched(void); +unsigned long rcu_exp_batches_completed(void); +unsigned long rcu_exp_batches_completed_sched(void); void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 531a328076bd..88df64087dfe 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -459,6 +459,28 @@ unsigned long rcu_batches_completed_bh(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); +/* + * Return the number of RCU expedited batches completed thus far for + * debug & stats. Odd numbers mean that a batch is in progress, even + * numbers mean idle. The value returned will thus be roughly double + * the cumulative batches since boot. + */ +unsigned long rcu_exp_batches_completed(void) +{ + return rcu_state_p->expedited_sequence; +} +EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); + +/* + * Return the number of RCU-sched expedited batches completed thus far + * for debug & stats. Similar to rcu_exp_batches_completed(). + */ +unsigned long rcu_exp_batches_completed_sched(void) +{ + return rcu_sched_state.expedited_sequence; +} +EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); + /* * Force a quiescent state. */ -- cgit v1.2.3 From d18d12d0ff07c47fb913f297c174f30a3f96042d Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 31 Mar 2016 15:51:32 +0200 Subject: lib/proportions: Remove unused code By accident I stumbled across code that is no longer used. According to git grep, the global functions in lib/proportions.c are not used anywhere. This patch removes the old, unused code. Peter Zijlstra further commented: "Ah indeed, that got replaced with the flex proportion code a while back." Signed-off-by: Richard Cochran Acked-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4265b49bed713fbe3faaf8c05da0e1792f09c0b3.1459432020.git.rcochran@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/proportions.h | 137 --------------- include/linux/sched.h | 1 - lib/Makefile | 2 +- lib/proportions.c | 407 -------------------------------------------- 4 files changed, 1 insertion(+), 546 deletions(-) delete mode 100644 include/linux/proportions.h delete mode 100644 lib/proportions.c (limited to 'include/linux') diff --git a/include/linux/proportions.h b/include/linux/proportions.h deleted file mode 100644 index 21221338ad18..000000000000 --- a/include/linux/proportions.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * FLoating proportions - * - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra - * - * This file contains the public data structure and API definitions. - */ - -#ifndef _LINUX_PROPORTIONS_H -#define _LINUX_PROPORTIONS_H - -#include -#include -#include -#include - -struct prop_global { - /* - * The period over which we differentiate - * - * period = 2^shift - */ - int shift; - /* - * The total event counter aka 'time'. - * - * Treated as an unsigned long; the lower 'shift - 1' bits are the - * counter bits, the remaining upper bits the period counter. - */ - struct percpu_counter events; -}; - -/* - * global proportion descriptor - * - * this is needed to consistently flip prop_global structures. - */ -struct prop_descriptor { - int index; - struct prop_global pg[2]; - struct mutex mutex; /* serialize the prop_global switch */ -}; - -int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp); -void prop_change_shift(struct prop_descriptor *pd, int new_shift); - -/* - * ----- PERCPU ------ - */ - -struct prop_local_percpu { - /* - * the local events counter - */ - struct percpu_counter events; - - /* - * snapshot of the last seen global state - */ - int shift; - unsigned long period; - raw_spinlock_t lock; /* protect the snapshot state */ -}; - -int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp); -void prop_local_destroy_percpu(struct prop_local_percpu *pl); -void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl); -void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl, - long *numerator, long *denominator); - -static inline -void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) -{ - unsigned long flags; - - local_irq_save(flags); - __prop_inc_percpu(pd, pl); - local_irq_restore(flags); -} - -/* - * Limit the time part in order to ensure there are some bits left for the - * cycle counter and fraction multiply. - */ -#if BITS_PER_LONG == 32 -#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) -#else -#define PROP_MAX_SHIFT (BITS_PER_LONG/2) -#endif - -#define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) -#define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) - -void __prop_inc_percpu_max(struct prop_descriptor *pd, - struct prop_local_percpu *pl, long frac); - - -/* - * ----- SINGLE ------ - */ - -struct prop_local_single { - /* - * the local events counter - */ - unsigned long events; - - /* - * snapshot of the last seen global state - * and a lock protecting this state - */ - unsigned long period; - int shift; - raw_spinlock_t lock; /* protect the snapshot state */ -}; - -#define INIT_PROP_LOCAL_SINGLE(name) \ -{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ -} - -int prop_local_init_single(struct prop_local_single *pl); -void prop_local_destroy_single(struct prop_local_single *pl); -void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl); -void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl, - long *numerator, long *denominator); - -static inline -void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) -{ - unsigned long flags; - - local_irq_save(flags); - __prop_inc_single(pd, pl); - local_irq_restore(flags); -} - -#endif /* _LINUX_PROPORTIONS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..6dd25d1869b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -40,7 +40,6 @@ struct sched_param { #include #include #include -#include #include #include #include diff --git a/lib/Makefile b/lib/Makefile index 7bd6fd436c97..a65e9a861535 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,7 +23,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ sha1.o md5.o irq_regs.o argv_split.o \ - proportions.o flex_proportions.o ratelimit.o show_mem.o \ + flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o nmi_backtrace.o diff --git a/lib/proportions.c b/lib/proportions.c deleted file mode 100644 index efa54f259ea9..000000000000 --- a/lib/proportions.c +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Floating proportions - * - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra - * - * Description: - * - * The floating proportion is a time derivative with an exponentially decaying - * history: - * - * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) - * - * Where j is an element from {prop_local}, x_{j} is j's number of events, - * and i the time period over which the differential is taken. So d/dt_{-i} is - * the differential over the i-th last period. - * - * The decaying history gives smooth transitions. The time differential carries - * the notion of speed. - * - * The denominator is 2^(1+i) because we want the series to be normalised, ie. - * - * \Sum_{i=0} 1/2^(1+i) = 1 - * - * Further more, if we measure time (t) in the same events as x; so that: - * - * t = \Sum_{j} x_{j} - * - * we get that: - * - * \Sum_{j} p_{j} = 1 - * - * Writing this in an iterative fashion we get (dropping the 'd's): - * - * if (++x_{j}, ++t > period) - * t /= 2; - * for_each (j) - * x_{j} /= 2; - * - * so that: - * - * p_{j} = x_{j} / t; - * - * We optimize away the '/= 2' for the global time delta by noting that: - * - * if (++t > period) t /= 2: - * - * Can be approximated by: - * - * period/2 + (++t % period/2) - * - * [ Furthermore, when we choose period to be 2^n it can be written in terms of - * binary operations and wraparound artefacts disappear. ] - * - * Also note that this yields a natural counter of the elapsed periods: - * - * c = t / (period/2) - * - * [ Its monotonic increasing property can be applied to mitigate the wrap- - * around issue. ] - * - * This allows us to do away with the loop over all prop_locals on each period - * expiration. By remembering the period count under which it was last accessed - * as c_{j}, we can obtain the number of 'missed' cycles from: - * - * c - c_{j} - * - * We can then lazily catch up to the global period count every time we are - * going to use x_{j}, by doing: - * - * x_{j} /= 2^(c - c_{j}), c_{j} = c - */ - -#include -#include - -int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp) -{ - int err; - - if (shift > PROP_MAX_SHIFT) - shift = PROP_MAX_SHIFT; - - pd->index = 0; - pd->pg[0].shift = shift; - mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0, gfp); - if (err) - goto out; - - err = percpu_counter_init(&pd->pg[1].events, 0, gfp); - if (err) - percpu_counter_destroy(&pd->pg[0].events); - -out: - return err; -} - -/* - * We have two copies, and flip between them to make it seem like an atomic - * update. The update is not really atomic wrt the events counter, but - * it is internally consistent with the bit layout depending on shift. - * - * We copy the events count, move the bits around and flip the index. - */ -void prop_change_shift(struct prop_descriptor *pd, int shift) -{ - int index; - int offset; - u64 events; - unsigned long flags; - - if (shift > PROP_MAX_SHIFT) - shift = PROP_MAX_SHIFT; - - mutex_lock(&pd->mutex); - - index = pd->index ^ 1; - offset = pd->pg[pd->index].shift - shift; - if (!offset) - goto out; - - pd->pg[index].shift = shift; - - local_irq_save(flags); - events = percpu_counter_sum(&pd->pg[pd->index].events); - if (offset < 0) - events <<= -offset; - else - events >>= offset; - percpu_counter_set(&pd->pg[index].events, events); - - /* - * ensure the new pg is fully written before the switch - */ - smp_wmb(); - pd->index = index; - local_irq_restore(flags); - - synchronize_rcu(); - -out: - mutex_unlock(&pd->mutex); -} - -/* - * wrap the access to the data in an rcu_read_lock() section; - * this is used to track the active references. - */ -static struct prop_global *prop_get_global(struct prop_descriptor *pd) -__acquires(RCU) -{ - int index; - - rcu_read_lock(); - index = pd->index; - /* - * match the wmb from vcd_flip() - */ - smp_rmb(); - return &pd->pg[index]; -} - -static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) -__releases(RCU) -{ - rcu_read_unlock(); -} - -static void -prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) -{ - int offset = *pl_shift - new_shift; - - if (!offset) - return; - - if (offset < 0) - *pl_period <<= -offset; - else - *pl_period >>= offset; - - *pl_shift = new_shift; -} - -/* - * PERCPU - */ - -#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) - -int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp) -{ - raw_spin_lock_init(&pl->lock); - pl->shift = 0; - pl->period = 0; - return percpu_counter_init(&pl->events, 0, gfp); -} - -void prop_local_destroy_percpu(struct prop_local_percpu *pl) -{ - percpu_counter_destroy(&pl->events); -} - -/* - * Catch up with missed period expirations. - * - * until (c_{j} == c) - * x_{j} -= x_{j}/2; - * c_{j}++; - */ -static -void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) -{ - unsigned long period = 1UL << (pg->shift - 1); - unsigned long period_mask = ~(period - 1); - unsigned long global_period; - unsigned long flags; - - global_period = percpu_counter_read(&pg->events); - global_period &= period_mask; - - /* - * Fast path - check if the local and global period count still match - * outside of the lock. - */ - if (pl->period == global_period) - return; - - raw_spin_lock_irqsave(&pl->lock, flags); - prop_adjust_shift(&pl->shift, &pl->period, pg->shift); - - /* - * For each missed period, we half the local counter. - * basically: - * pl->events >> (global_period - pl->period); - */ - period = (global_period - pl->period) >> (pg->shift - 1); - if (period < BITS_PER_LONG) { - s64 val = percpu_counter_read(&pl->events); - - if (val < (nr_cpu_ids * PROP_BATCH)) - val = percpu_counter_sum(&pl->events); - - __percpu_counter_add(&pl->events, -val + (val >> period), - PROP_BATCH); - } else - percpu_counter_set(&pl->events, 0); - - pl->period = global_period; - raw_spin_unlock_irqrestore(&pl->lock, flags); -} - -/* - * ++x_{j}, ++t - */ -void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) -{ - struct prop_global *pg = prop_get_global(pd); - - prop_norm_percpu(pg, pl); - __percpu_counter_add(&pl->events, 1, PROP_BATCH); - percpu_counter_add(&pg->events, 1); - prop_put_global(pd, pg); -} - -/* - * identical to __prop_inc_percpu, except that it limits this pl's fraction to - * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. - */ -void __prop_inc_percpu_max(struct prop_descriptor *pd, - struct prop_local_percpu *pl, long frac) -{ - struct prop_global *pg = prop_get_global(pd); - - prop_norm_percpu(pg, pl); - - if (unlikely(frac != PROP_FRAC_BASE)) { - unsigned long period_2 = 1UL << (pg->shift - 1); - unsigned long counter_mask = period_2 - 1; - unsigned long global_count; - long numerator, denominator; - - numerator = percpu_counter_read_positive(&pl->events); - global_count = percpu_counter_read(&pg->events); - denominator = period_2 + (global_count & counter_mask); - - if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) - goto out_put; - } - - percpu_counter_add(&pl->events, 1); - percpu_counter_add(&pg->events, 1); - -out_put: - prop_put_global(pd, pg); -} - -/* - * Obtain a fraction of this proportion - * - * p_{j} = x_{j} / (period/2 + t % period/2) - */ -void prop_fraction_percpu(struct prop_descriptor *pd, - struct prop_local_percpu *pl, - long *numerator, long *denominator) -{ - struct prop_global *pg = prop_get_global(pd); - unsigned long period_2 = 1UL << (pg->shift - 1); - unsigned long counter_mask = period_2 - 1; - unsigned long global_count; - - prop_norm_percpu(pg, pl); - *numerator = percpu_counter_read_positive(&pl->events); - - global_count = percpu_counter_read(&pg->events); - *denominator = period_2 + (global_count & counter_mask); - - prop_put_global(pd, pg); -} - -/* - * SINGLE - */ - -int prop_local_init_single(struct prop_local_single *pl) -{ - raw_spin_lock_init(&pl->lock); - pl->shift = 0; - pl->period = 0; - pl->events = 0; - return 0; -} - -void prop_local_destroy_single(struct prop_local_single *pl) -{ -} - -/* - * Catch up with missed period expirations. - */ -static -void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) -{ - unsigned long period = 1UL << (pg->shift - 1); - unsigned long period_mask = ~(period - 1); - unsigned long global_period; - unsigned long flags; - - global_period = percpu_counter_read(&pg->events); - global_period &= period_mask; - - /* - * Fast path - check if the local and global period count still match - * outside of the lock. - */ - if (pl->period == global_period) - return; - - raw_spin_lock_irqsave(&pl->lock, flags); - prop_adjust_shift(&pl->shift, &pl->period, pg->shift); - /* - * For each missed period, we half the local counter. - */ - period = (global_period - pl->period) >> (pg->shift - 1); - if (likely(period < BITS_PER_LONG)) - pl->events >>= period; - else - pl->events = 0; - pl->period = global_period; - raw_spin_unlock_irqrestore(&pl->lock, flags); -} - -/* - * ++x_{j}, ++t - */ -void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) -{ - struct prop_global *pg = prop_get_global(pd); - - prop_norm_single(pg, pl); - pl->events++; - percpu_counter_add(&pg->events, 1); - prop_put_global(pd, pg); -} - -/* - * Obtain a fraction of this proportion - * - * p_{j} = x_{j} / (period/2 + t % period/2) - */ -void prop_fraction_single(struct prop_descriptor *pd, - struct prop_local_single *pl, - long *numerator, long *denominator) -{ - struct prop_global *pg = prop_get_global(pd); - unsigned long period_2 = 1UL << (pg->shift - 1); - unsigned long counter_mask = period_2 - 1; - unsigned long global_count; - - prop_norm_single(pg, pl); - *numerator = pl->events; - - global_count = percpu_counter_read(&pg->events); - *denominator = period_2 + (global_count & counter_mask); - - prop_put_global(pd, pg); -} -- cgit v1.2.3 From 1ce15ef4f60529cf1313f80f4338c88bd65cc572 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 22 Mar 2016 20:03:16 -0400 Subject: module: preserve Elf information for livepatch modules For livepatch modules, copy Elf section, symbol, and string information from the load_info struct in the module loader. Persist copies of the original symbol table and string table. Livepatch manages its own relocation sections in order to reuse module loader code to write relocations. Livepatch modules must preserve Elf information such as section indices in order to apply livepatch relocation sections using the module loader's apply_relocate_add() function. In order to apply livepatch relocation sections, livepatch modules must keep a complete copy of their original symbol table in memory. Normally, a stripped down copy of a module's symbol table (containing only "core" symbols) is made available through module->core_symtab. But for livepatch modules, the symbol table copied into memory on module load must be exactly the same as the symbol table produced when the patch module was compiled. This is because the relocations in each livepatch relocation section refer to their respective symbols with their symbol indices, and the original symbol indices (and thus the symtab ordering) must be preserved in order for apply_relocate_add() to find the right symbol. Signed-off-by: Jessica Yu Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Acked-by: Rusty Russell Reviewed-by: Rusty Russell Signed-off-by: Jiri Kosina --- include/linux/module.h | 25 ++++++++++ kernel/module.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 147 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 2bb0c3085706..3daf2b3a09d2 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -330,6 +330,15 @@ struct mod_kallsyms { char *strtab; }; +#ifdef CONFIG_LIVEPATCH +struct klp_modinfo { + Elf_Ehdr hdr; + Elf_Shdr *sechdrs; + char *secstrings; + unsigned int symndx; +}; +#endif + struct module { enum module_state state; @@ -456,7 +465,11 @@ struct module { #endif #ifdef CONFIG_LIVEPATCH + bool klp; /* Is this a livepatch module? */ bool klp_alive; + + /* Elf information */ + struct klp_modinfo *klp_info; #endif #ifdef CONFIG_MODULE_UNLOAD @@ -630,6 +643,18 @@ static inline bool module_requested_async_probing(struct module *module) return module && module->async_probe_requested; } +#ifdef CONFIG_LIVEPATCH +static inline bool is_livepatch_module(struct module *mod) +{ + return mod->klp; +} +#else /* !CONFIG_LIVEPATCH */ +static inline bool is_livepatch_module(struct module *mod) +{ + return false; +} +#endif /* CONFIG_LIVEPATCH */ + #else /* !CONFIG_MODULES... */ /* Given an address, look for it in the exception tables. */ diff --git a/kernel/module.c b/kernel/module.c index 041200ca4a2d..5f71aa63ed2a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1973,6 +1973,83 @@ static void module_enable_nx(const struct module *mod) { } static void module_disable_nx(const struct module *mod) { } #endif +#ifdef CONFIG_LIVEPATCH +/* + * Persist Elf information about a module. Copy the Elf header, + * section header table, section string table, and symtab section + * index from info to mod->klp_info. + */ +static int copy_module_elf(struct module *mod, struct load_info *info) +{ + unsigned int size, symndx; + int ret; + + size = sizeof(*mod->klp_info); + mod->klp_info = kmalloc(size, GFP_KERNEL); + if (mod->klp_info == NULL) + return -ENOMEM; + + /* Elf header */ + size = sizeof(mod->klp_info->hdr); + memcpy(&mod->klp_info->hdr, info->hdr, size); + + /* Elf section header table */ + size = sizeof(*info->sechdrs) * info->hdr->e_shnum; + mod->klp_info->sechdrs = kmalloc(size, GFP_KERNEL); + if (mod->klp_info->sechdrs == NULL) { + ret = -ENOMEM; + goto free_info; + } + memcpy(mod->klp_info->sechdrs, info->sechdrs, size); + + /* Elf section name string table */ + size = info->sechdrs[info->hdr->e_shstrndx].sh_size; + mod->klp_info->secstrings = kmalloc(size, GFP_KERNEL); + if (mod->klp_info->secstrings == NULL) { + ret = -ENOMEM; + goto free_sechdrs; + } + memcpy(mod->klp_info->secstrings, info->secstrings, size); + + /* Elf symbol section index */ + symndx = info->index.sym; + mod->klp_info->symndx = symndx; + + /* + * For livepatch modules, core_kallsyms.symtab is a complete + * copy of the original symbol table. Adjust sh_addr to point + * to core_kallsyms.symtab since the copy of the symtab in module + * init memory is freed at the end of do_init_module(). + */ + mod->klp_info->sechdrs[symndx].sh_addr = \ + (unsigned long) mod->core_kallsyms.symtab; + + return 0; + +free_sechdrs: + kfree(mod->klp_info->sechdrs); +free_info: + kfree(mod->klp_info); + return ret; +} + +static void free_module_elf(struct module *mod) +{ + kfree(mod->klp_info->sechdrs); + kfree(mod->klp_info->secstrings); + kfree(mod->klp_info); +} +#else /* !CONFIG_LIVEPATCH */ +static int copy_module_elf(struct module *mod, struct load_info *info) +{ + return 0; +} + +static void free_module_elf(struct module *mod) +{ +} +#endif /* CONFIG_LIVEPATCH */ + void __weak module_memfree(void *module_region) { vfree(module_region); @@ -2011,6 +2088,9 @@ static void free_module(struct module *mod) /* Free any allocated parameters. */ destroy_params(mod->kp, mod->num_kp); + if (is_livepatch_module(mod)) + free_module_elf(mod); + /* Now we can delete it from the lists */ mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ @@ -2126,6 +2206,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) (long)sym[i].st_value); break; + case SHN_LIVEPATCH: + /* Livepatch symbols are resolved by livepatch */ + break; + case SHN_UNDEF: ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ @@ -2174,6 +2258,10 @@ static int apply_relocations(struct module *mod, const struct load_info *info) if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC)) continue; + /* Livepatch relocation sections are applied by livepatch */ + if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH) + continue; + if (info->sechdrs[i].sh_type == SHT_REL) err = apply_relocate(info->sechdrs, info->strtab, info->index.sym, i, mod); @@ -2469,7 +2557,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) /* Compute total space required for the core symbols' strtab. */ for (ndst = i = 0; i < nsrc; i++) { - if (i == 0 || + if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { strtab_size += strlen(&info->strtab[src[i].st_name])+1; @@ -2528,7 +2616,7 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs; src = mod->kallsyms->symtab; for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) { - if (i == 0 || + if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { dst[ndst] = src[i]; @@ -2667,6 +2755,26 @@ static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned l return 0; } +#ifdef CONFIG_LIVEPATCH +static int find_livepatch_modinfo(struct module *mod, struct load_info *info) +{ + mod->klp = get_modinfo(info, "livepatch") ? true : false; + + return 0; +} +#else /* !CONFIG_LIVEPATCH */ +static int find_livepatch_modinfo(struct module *mod, struct load_info *info) +{ + if (get_modinfo(info, "livepatch")) { + pr_err("%s: module is marked as livepatch module, but livepatch support is disabled", + mod->name); + return -ENOEXEC; + } + + return 0; +} +#endif /* CONFIG_LIVEPATCH */ + /* Sets info->hdr and info->len. */ static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) @@ -2821,6 +2929,10 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) "is unknown, you have been warned.\n", mod->name); } + err = find_livepatch_modinfo(mod, info); + if (err) + return err; + /* Set up license info based on the info section */ set_license(mod, get_modinfo(info, "license")); @@ -3494,6 +3606,12 @@ static int load_module(struct load_info *info, const char __user *uargs, if (err < 0) goto coming_cleanup; + if (is_livepatch_module(mod)) { + err = copy_module_elf(mod, info); + if (err < 0) + goto sysfs_cleanup; + } + /* Get rid of temporary copy. */ free_copy(info); @@ -3502,11 +3620,12 @@ static int load_module(struct load_info *info, const char __user *uargs, return do_init_module(mod); + sysfs_cleanup: + mod_sysfs_teardown(mod); coming_cleanup: blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); klp_module_going(mod); - bug_cleanup: /* module_bug_cleanup needs module_mutex protection */ mutex_lock(&module_mutex); -- cgit v1.2.3 From 425595a7fc2096ab46c741b5ed5372c5ab5bbeac Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 22 Mar 2016 20:03:18 -0400 Subject: livepatch: reuse module loader code to write relocations Reuse module loader code to write relocations, thereby eliminating the need for architecture specific relocation code in livepatch. Specifically, reuse the apply_relocate_add() function in the module loader to write relocations instead of duplicating functionality in livepatch's arch-dependent klp_write_module_reloc() function. In order to accomplish this, livepatch modules manage their own relocation sections (marked with the SHF_RELA_LIVEPATCH section flag) and livepatch-specific symbols (marked with SHN_LIVEPATCH symbol section index). To apply livepatch relocation sections, livepatch symbols referenced by relocs are resolved and then apply_relocate_add() is called to apply those relocations. In addition, remove x86 livepatch relocation code and the s390 klp_write_module_reloc() function stub. They are no longer needed since relocation work has been offloaded to module loader. Lastly, mark the module as a livepatch module so that the module loader canappropriately identify and initialize it. Signed-off-by: Jessica Yu Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Acked-by: Heiko Carstens # for s390 changes Signed-off-by: Jiri Kosina --- arch/s390/include/asm/livepatch.h | 7 -- arch/x86/include/asm/livepatch.h | 2 - arch/x86/kernel/Makefile | 1 - arch/x86/kernel/livepatch.c | 70 ----------------- include/linux/livepatch.h | 20 ----- kernel/livepatch/core.c | 148 ++++++++++++++++++++++------------- samples/livepatch/livepatch-sample.c | 1 + 7 files changed, 95 insertions(+), 154 deletions(-) delete mode 100644 arch/x86/kernel/livepatch.c (limited to 'include/linux') diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index d5427c78b1b3..2c1213785892 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -24,13 +24,6 @@ static inline int klp_check_compiler_support(void) return 0; } -static inline int klp_write_module_reloc(struct module *mod, unsigned long - type, unsigned long loc, unsigned long value) -{ - /* not supported yet */ - return -ENOSYS; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->psw.addr = ip; diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index 7e68f9558552..a7f9181f63f3 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -32,8 +32,6 @@ static inline int klp_check_compiler_support(void) #endif return 0; } -int klp_write_module_reloc(struct module *mod, unsigned long type, - unsigned long loc, unsigned long value); static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b1b78ffe01d0..c5e9a5cf976b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -67,7 +67,6 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-y += apic/ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_LIVEPATCH) += livepatch.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c deleted file mode 100644 index 92fc1a51f994..000000000000 --- a/arch/x86/kernel/livepatch.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * livepatch.c - x86-specific Kernel Live Patching Core - * - * Copyright (C) 2014 Seth Jennings - * Copyright (C) 2014 SUSE - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include -#include -#include - -/** - * klp_write_module_reloc() - write a relocation in a module - * @mod: module in which the section to be modified is found - * @type: ELF relocation type (see asm/elf.h) - * @loc: address that the relocation should be written to - * @value: relocation value (sym address + addend) - * - * This function writes a relocation to the specified location for - * a particular module. - */ -int klp_write_module_reloc(struct module *mod, unsigned long type, - unsigned long loc, unsigned long value) -{ - size_t size = 4; - unsigned long val; - unsigned long core = (unsigned long)mod->core_layout.base; - unsigned long core_size = mod->core_layout.size; - - switch (type) { - case R_X86_64_NONE: - return 0; - case R_X86_64_64: - val = value; - size = 8; - break; - case R_X86_64_32: - val = (u32)value; - break; - case R_X86_64_32S: - val = (s32)value; - break; - case R_X86_64_PC32: - val = (u32)(value - loc); - break; - default: - /* unsupported relocation type */ - return -EINVAL; - } - - if (loc < core || loc >= core + core_size) - /* loc does not point to any symbol inside the module */ - return -EINVAL; - - return probe_kernel_write((void *)loc, &val, size); -} diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index bd830d590465..0933ca47791c 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -64,28 +64,9 @@ struct klp_func { struct list_head stack_node; }; -/** - * struct klp_reloc - relocation structure for live patching - * @loc: address where the relocation will be written - * @sympos: position in kallsyms to disambiguate symbols (optional) - * @type: ELF relocation type - * @name: name of the referenced symbol (for lookup/verification) - * @addend: offset from the referenced symbol - * @external: symbol is either exported or within the live patch module itself - */ -struct klp_reloc { - unsigned long loc; - unsigned long sympos; - unsigned long type; - const char *name; - int addend; - int external; -}; - /** * struct klp_object - kernel object structure for live patching * @name: module name (or NULL for vmlinux) - * @relocs: relocation entries to be applied at load time * @funcs: function entries for functions to be patched in the object * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object @@ -95,7 +76,6 @@ struct klp_reloc { struct klp_object { /* external */ const char *name; - struct klp_reloc *relocs; struct klp_func *funcs; /* internal */ diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index d68fbf63b083..eb5db6e837aa 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include /** @@ -204,75 +206,109 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -/* - * external symbols are located outside the parent object (where the parent - * object is either vmlinux or the kmod being patched). - */ -static int klp_find_external_symbol(struct module *pmod, const char *name, - unsigned long *addr) +static int klp_resolve_symbols(Elf_Shdr *relasec, struct module *pmod) { - const struct kernel_symbol *sym; - - /* first, check if it's an exported symbol */ - preempt_disable(); - sym = find_symbol(name, NULL, NULL, true, true); - if (sym) { - *addr = sym->value; - preempt_enable(); - return 0; - } - preempt_enable(); + int i, cnt, vmlinux, ret; + char objname[MODULE_NAME_LEN]; + char symname[KSYM_NAME_LEN]; + char *strtab = pmod->core_kallsyms.strtab; + Elf_Rela *relas; + Elf_Sym *sym; + unsigned long sympos, addr; /* - * Check if it's in another .o within the patch module. This also - * checks that the external symbol is unique. + * Since the field widths for objname and symname in the sscanf() + * call are hard-coded and correspond to MODULE_NAME_LEN and + * KSYM_NAME_LEN respectively, we must make sure that MODULE_NAME_LEN + * and KSYM_NAME_LEN have the values we expect them to have. + * + * Because the value of MODULE_NAME_LEN can differ among architectures, + * we use the smallest/strictest upper bound possible (56, based on + * the current definition of MODULE_NAME_LEN) to prevent overflows. */ - return klp_find_object_symbol(pmod->name, name, 0, addr); + BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128); + + relas = (Elf_Rela *) relasec->sh_addr; + /* For each rela in this klp relocation section */ + for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { + sym = pmod->core_kallsyms.symtab + ELF_R_SYM(relas[i].r_info); + if (sym->st_shndx != SHN_LIVEPATCH) { + pr_err("symbol %s is not marked as a livepatch symbol", + strtab + sym->st_name); + return -EINVAL; + } + + /* Format: .klp.sym.objname.symname,sympos */ + cnt = sscanf(strtab + sym->st_name, + ".klp.sym.%55[^.].%127[^,],%lu", + objname, symname, &sympos); + if (cnt != 3) { + pr_err("symbol %s has an incorrectly formatted name", + strtab + sym->st_name); + return -EINVAL; + } + + /* klp_find_object_symbol() treats a NULL objname as vmlinux */ + vmlinux = !strcmp(objname, "vmlinux"); + ret = klp_find_object_symbol(vmlinux ? NULL : objname, + symname, sympos, &addr); + if (ret) + return ret; + + sym->st_value = addr; + } + + return 0; } static int klp_write_object_relocations(struct module *pmod, struct klp_object *obj) { - int ret = 0; - unsigned long val; - struct klp_reloc *reloc; + int i, cnt, ret = 0; + const char *objname, *secname; + char sec_objname[MODULE_NAME_LEN]; + Elf_Shdr *sec; if (WARN_ON(!klp_is_object_loaded(obj))) return -EINVAL; - if (WARN_ON(!obj->relocs)) - return -EINVAL; + objname = klp_is_module(obj) ? obj->name : "vmlinux"; module_disable_ro(pmod); + /* For each klp relocation section */ + for (i = 1; i < pmod->klp_info->hdr.e_shnum; i++) { + sec = pmod->klp_info->sechdrs + i; + secname = pmod->klp_info->secstrings + sec->sh_name; + if (!(sec->sh_flags & SHF_RELA_LIVEPATCH)) + continue; - for (reloc = obj->relocs; reloc->name; reloc++) { - /* discover the address of the referenced symbol */ - if (reloc->external) { - if (reloc->sympos > 0) { - pr_err("non-zero sympos for external reloc symbol '%s' is not supported\n", - reloc->name); - ret = -EINVAL; - goto out; - } - ret = klp_find_external_symbol(pmod, reloc->name, &val); - } else - ret = klp_find_object_symbol(obj->name, - reloc->name, - reloc->sympos, - &val); + /* + * Format: .klp.rela.sec_objname.section_name + * See comment in klp_resolve_symbols() for an explanation + * of the selected field width value. + */ + cnt = sscanf(secname, ".klp.rela.%55[^.]", sec_objname); + if (cnt != 1) { + pr_err("section %s has an incorrectly formatted name", + secname); + ret = -EINVAL; + break; + } + + if (strcmp(objname, sec_objname)) + continue; + + ret = klp_resolve_symbols(sec, pmod); if (ret) - goto out; + break; - ret = klp_write_module_reloc(pmod, reloc->type, reloc->loc, - val + reloc->addend); - if (ret) { - pr_err("relocation failed for symbol '%s' at 0x%016lx (%d)\n", - reloc->name, val, ret); - goto out; - } + ret = apply_relocate_add(pmod->klp_info->sechdrs, + pmod->core_kallsyms.strtab, + pmod->klp_info->symndx, i, pmod); + if (ret) + break; } -out: module_enable_ro(pmod); return ret; } @@ -703,11 +739,9 @@ static int klp_init_object_loaded(struct klp_patch *patch, struct klp_func *func; int ret; - if (obj->relocs) { - ret = klp_write_object_relocations(patch->mod, obj); - if (ret) - return ret; - } + ret = klp_write_object_relocations(patch->mod, obj); + if (ret) + return ret; klp_for_each_func(obj, func) { ret = klp_find_object_symbol(obj->name, func->old_name, @@ -842,6 +876,12 @@ int klp_register_patch(struct klp_patch *patch) { int ret; + if (!is_livepatch_module(patch->mod)) { + pr_err("module %s is not marked as a livepatch module", + patch->mod->name); + return -EINVAL; + } + if (!klp_initialized()) return -ENODEV; diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index fb8c8614e728..e34f871e69b1 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -89,3 +89,4 @@ static void livepatch_exit(void) module_init(livepatch_init); module_exit(livepatch_exit); MODULE_LICENSE("GPL"); +MODULE_INFO(livepatch, "Y"); -- cgit v1.2.3 From 0bed612be638e41456cd8cb270a2b411a5b43d63 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 2 Apr 2016 01:08:43 +0200 Subject: cpufreq: sched: Helpers to add and remove update_util hooks Replace the single helper for adding and removing cpufreq utilization update hooks, cpufreq_set_update_util_data(), with a pair of helpers, cpufreq_add_update_util_hook() and cpufreq_remove_update_util_hook(), and modify the users of cpufreq_set_update_util_data() accordingly. With the new helpers, the code using them doesn't need to worry about the internals of struct update_util_data and in particular it doesn't need to worry about populating the func field in it properly upfront. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) --- drivers/cpufreq/cpufreq_governor.c | 76 +++++++++++++++++++------------------- drivers/cpufreq/intel_pstate.c | 7 ++-- include/linux/sched.h | 5 ++- kernel/sched/cpufreq.c | 48 ++++++++++++++++++------ 4 files changed, 82 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 10a5cfeae8c5..3a0312f46027 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -258,43 +258,6 @@ unsigned int dbs_update(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(dbs_update); -static void gov_set_update_util(struct policy_dbs_info *policy_dbs, - unsigned int delay_us) -{ - struct cpufreq_policy *policy = policy_dbs->policy; - int cpu; - - gov_update_sample_delay(policy_dbs, delay_us); - policy_dbs->last_sample_time = 0; - - for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); - - cpufreq_set_update_util_data(cpu, &cdbs->update_util); - } -} - -static inline void gov_clear_update_util(struct cpufreq_policy *policy) -{ - int i; - - for_each_cpu(i, policy->cpus) - cpufreq_set_update_util_data(i, NULL); - - synchronize_sched(); -} - -static void gov_cancel_work(struct cpufreq_policy *policy) -{ - struct policy_dbs_info *policy_dbs = policy->governor_data; - - gov_clear_update_util(policy_dbs->policy); - irq_work_sync(&policy_dbs->irq_work); - cancel_work_sync(&policy_dbs->work); - atomic_set(&policy_dbs->work_count, 0); - policy_dbs->work_in_progress = false; -} - static void dbs_work_handler(struct work_struct *work) { struct policy_dbs_info *policy_dbs; @@ -382,6 +345,44 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, irq_work_queue(&policy_dbs->irq_work); } +static void gov_set_update_util(struct policy_dbs_info *policy_dbs, + unsigned int delay_us) +{ + struct cpufreq_policy *policy = policy_dbs->policy; + int cpu; + + gov_update_sample_delay(policy_dbs, delay_us); + policy_dbs->last_sample_time = 0; + + for_each_cpu(cpu, policy->cpus) { + struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); + + cpufreq_add_update_util_hook(cpu, &cdbs->update_util, + dbs_update_util_handler); + } +} + +static inline void gov_clear_update_util(struct cpufreq_policy *policy) +{ + int i; + + for_each_cpu(i, policy->cpus) + cpufreq_remove_update_util_hook(i); + + synchronize_sched(); +} + +static void gov_cancel_work(struct cpufreq_policy *policy) +{ + struct policy_dbs_info *policy_dbs = policy->governor_data; + + gov_clear_update_util(policy_dbs->policy); + irq_work_sync(&policy_dbs->irq_work); + cancel_work_sync(&policy_dbs->work); + atomic_set(&policy_dbs->work_count, 0); + policy_dbs->work_in_progress = false; +} + static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { @@ -404,7 +405,6 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->policy_dbs = policy_dbs; - j_cdbs->update_util.func = dbs_update_util_handler; } return policy_dbs; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9ae159631f52..a38219527637 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1107,8 +1107,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_busy_pid_reset(cpu); - cpu->update_util.func = intel_pstate_update_util; - pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); return 0; @@ -1132,12 +1130,13 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; - cpufreq_set_update_util_data(cpu_num, &cpu->update_util); + cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, + intel_pstate_update_util); } static void intel_pstate_clear_update_util_hook(unsigned int cpu) { - cpufreq_set_update_util_data(cpu, NULL); + cpufreq_remove_update_util_hook(cpu); synchronize_sched(); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..1b8825922597 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3240,7 +3240,10 @@ struct update_util_data { u64 time, unsigned long util, unsigned long max); }; -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max)); +void cpufreq_remove_update_util_hook(int cpu); #endif /* CONFIG_CPU_FREQ */ #endif diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index 928c4ba32f68..1141954e73b4 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -14,24 +14,50 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); /** - * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. + * cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer. * @cpu: The CPU to set the pointer for. * @data: New pointer value. + * @func: Callback function to set for the CPU. * - * Set and publish the update_util_data pointer for the given CPU. That pointer - * points to a struct update_util_data object containing a callback function - * to call from cpufreq_update_util(). That function will be called from an RCU - * read-side critical section, so it must not sleep. + * Set and publish the update_util_data pointer for the given CPU. * - * Callers must use RCU-sched callbacks to free any memory that might be - * accessed via the old update_util_data pointer or invoke synchronize_sched() - * right after this function to avoid use-after-free. + * The update_util_data pointer of @cpu is set to @data and the callback + * function pointer in the target struct update_util_data is set to @func. + * That function will be called by cpufreq_update_util() from RCU-sched + * read-side critical sections, so it must not sleep. @data will always be + * passed to it as the first argument which allows the function to get to the + * target update_util_data structure and its container. + * + * The update_util_data pointer of @cpu must be NULL when this function is + * called or it will WARN() and return with no effect. */ -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max)) { - if (WARN_ON(data && !data->func)) + if (WARN_ON(!data || !func)) return; + if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu))) + return; + + data->func = func; rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); } -EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); +EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook); + +/** + * cpufreq_remove_update_util_hook - Clear the CPU's update_util_data pointer. + * @cpu: The CPU to clear the pointer for. + * + * Clear the update_util_data pointer for the given CPU. + * + * Callers must use RCU-sched callbacks to free any memory that might be + * accessed via the old update_util_data pointer or invoke synchronize_sched() + * right after this function to avoid use-after-free. + */ +void cpufreq_remove_update_util_hook(int cpu) +{ + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL); +} +EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook); -- cgit v1.2.3 From 66893b6ac9952ec9d9409e9d85eaacf37bba8d15 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Mar 2016 02:50:45 +0100 Subject: cpufreq: Move governor attribute set headers to cpufreq.h Move definitions and function headers related to struct gov_attr_set to include/linux/cpufreq.h so they can be used by (future) goverernors located outside of drivers/cpufreq/. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.h | 21 --------------------- include/linux/cpufreq.h | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ccdd0814efbc..181a43058ece 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -41,19 +41,6 @@ /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; -struct gov_attr_set { - struct kobject kobj; - struct list_head policy_list; - struct mutex update_lock; - int usage_count; -}; - -extern const struct sysfs_ops governor_sysfs_ops; - -void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); -void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); -unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); - /* * Abbreviations: * dbs: used as a shortform for demand based switching It helps to keep variable @@ -80,14 +67,6 @@ static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set) return container_of(attr_set, struct dbs_data, attr_set); } -/* Governor's specific attributes */ -struct governor_attr { - struct attribute attr; - ssize_t (*show)(struct gov_attr_set *attr_set, char *buf); - ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf, - size_t count); -}; - #define gov_show_one(_gov, file_name) \ static ssize_t show_##file_name \ (struct gov_attr_set *attr_set, char *buf) \ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 718e8725de8a..74f5cfff2b52 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -462,6 +462,29 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor); struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +/* Governor attribute set */ +struct gov_attr_set { + struct kobject kobj; + struct list_head policy_list; + struct mutex update_lock; + int usage_count; +}; + +/* sysfs ops for cpufreq governors */ +extern const struct sysfs_ops governor_sysfs_ops; + +void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); +void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); +unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); + +/* Governor sysfs attribute */ +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct gov_attr_set *attr_set, char *buf); + ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf, + size_t count); +}; + /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ -- cgit v1.2.3 From 379480d8258056bfdbaa65e4d3f024bb5b34b52b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Mar 2016 02:51:56 +0100 Subject: cpufreq: Move governor symbols to cpufreq.h Move definitions of symbols related to transition latency and sampling rate to include/linux/cpufreq.h so they can be used by (future) goverernors located outside of drivers/cpufreq/. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.h | 14 -------------- include/linux/cpufreq.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 181a43058ece..3e0eb7c54903 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -24,20 +24,6 @@ #include #include -/* - * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. The - * governor will work on any processor with transition latency <= 10ms, using - * appropriate sampling rate. - * - * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) - * this governor will not work. All times here are in us (micro seconds). - */ -#define MIN_SAMPLING_RATE_RATIO (2) -#define LATENCY_MULTIPLIER (1000) -#define MIN_LATENCY_MULTIPLIER (20) -#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) - /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 74f5cfff2b52..2b4f248b8ef1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -426,6 +426,20 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define CPUFREQ_POLICY_POWERSAVE (1) #define CPUFREQ_POLICY_PERFORMANCE (2) +/* + * The polling frequency depends on the capability of the processor. Default + * polling frequency is 1000 times the transition latency of the processor. The + * ondemand governor will work on any processor with transition latency <= 10ms, + * using appropriate sampling rate. + * + * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) + * the ondemand governor will not work. All times here are in us (microseconds). + */ +#define MIN_SAMPLING_RATE_RATIO (2) +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (20) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + /* Governor Events */ #define CPUFREQ_GOV_START 1 #define CPUFREQ_GOV_STOP 2 -- cgit v1.2.3 From b7898fda5bc7e786e76ce24fbd2ec993b08ec518 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Mar 2016 03:47:49 +0200 Subject: cpufreq: Support for fast frequency switching Modify the ACPI cpufreq driver to provide a method for switching CPU frequencies from interrupt context and update the cpufreq core to support that method if available. Introduce a new cpufreq driver callback, ->fast_switch, to be invoked for frequency switching from interrupt context by (future) governors supporting that feature via (new) helper function cpufreq_driver_fast_switch(). Add two new policy flags, fast_switch_possible, to be set by the cpufreq driver if fast frequency switching can be used for the given policy and fast_switch_enabled, to be set by the governor if it is going to use fast frequency switching for the given policy. Also add a helper for setting the latter. Since fast frequency switching is inherently incompatible with cpufreq transition notifiers, make it possible to set the fast_switch_enabled only if there are no transition notifiers already registered and make the registration of new transition notifiers fail if fast_switch_enabled is set for at least one policy. Implement the ->fast_switch callback in the ACPI cpufreq driver and make it set fast_switch_possible during policy initialization as appropriate. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/acpi-cpufreq.c | 42 +++++++++++++ drivers/cpufreq/cpufreq.c | 130 +++++++++++++++++++++++++++++++++++++++-- include/linux/cpufreq.h | 16 +++++ 3 files changed, 183 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index fb5712141040..7f38fb55f223 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -458,6 +458,43 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return result; } +unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct acpi_cpufreq_data *data = policy->driver_data; + struct acpi_processor_performance *perf; + struct cpufreq_frequency_table *entry; + unsigned int next_perf_state, next_freq, freq; + + /* + * Find the closest frequency above target_freq. + * + * The table is sorted in the reverse order with respect to the + * frequency and all of the entries are valid (see the initialization). + */ + entry = data->freq_table; + do { + entry++; + freq = entry->frequency; + } while (freq >= target_freq && freq != CPUFREQ_TABLE_END); + entry--; + next_freq = entry->frequency; + next_perf_state = entry->driver_data; + + perf = to_perf_data(data); + if (perf->state == next_perf_state) { + if (unlikely(data->resume)) + data->resume = 0; + else + return next_freq; + } + + data->cpu_freq_write(&perf->control_register, + perf->states[next_perf_state].control); + perf->state = next_perf_state; + return next_freq; +} + static unsigned long acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { @@ -821,6 +858,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) */ data->resume = 1; + policy->fast_switch_possible = !acpi_pstate_strict && + !(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY); + return result; err_freqfree: @@ -843,6 +883,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) pr_debug("acpi_cpufreq_cpu_exit\n"); if (data) { + policy->fast_switch_possible = false; policy->driver_data = NULL; acpi_processor_unregister_performance(data->acpi_perf_cpu); free_cpumask_var(data->freqdomain_cpus); @@ -876,6 +917,7 @@ static struct freq_attr *acpi_cpufreq_attr[] = { static struct cpufreq_driver acpi_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = acpi_cpufreq_target, + .fast_switch = acpi_cpufreq_fast_switch, .bios_limit = acpi_processor_get_bios_limit, .init = acpi_cpufreq_cpu_init, .exit = acpi_cpufreq_cpu_exit, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b87596b591b3..a5b7d77d4816 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -77,6 +77,7 @@ static inline bool has_target(void) static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static int cpufreq_start_governor(struct cpufreq_policy *policy); +static int cpufreq_exit_governor(struct cpufreq_policy *policy); /** * Two notifier lists: the "policy" list is involved in the @@ -429,6 +430,68 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end); +/* + * Fast frequency switching status count. Positive means "enabled", negative + * means "disabled" and 0 means "not decided yet". + */ +static int cpufreq_fast_switch_count; +static DEFINE_MUTEX(cpufreq_fast_switch_lock); + +static void cpufreq_list_transition_notifiers(void) +{ + struct notifier_block *nb; + + pr_info("Registered transition notifiers:\n"); + + mutex_lock(&cpufreq_transition_notifier_list.mutex); + + for (nb = cpufreq_transition_notifier_list.head; nb; nb = nb->next) + pr_info("%pF\n", nb->notifier_call); + + mutex_unlock(&cpufreq_transition_notifier_list.mutex); +} + +/** + * cpufreq_enable_fast_switch - Enable fast frequency switching for policy. + * @policy: cpufreq policy to enable fast frequency switching for. + * + * Try to enable fast frequency switching for @policy. + * + * The attempt will fail if there is at least one transition notifier registered + * at this point, as fast frequency switching is quite fundamentally at odds + * with transition notifiers. Thus if successful, it will make registration of + * transition notifiers fail going forward. + */ +void cpufreq_enable_fast_switch(struct cpufreq_policy *policy) +{ + lockdep_assert_held(&policy->rwsem); + + if (!policy->fast_switch_possible) + return; + + mutex_lock(&cpufreq_fast_switch_lock); + if (cpufreq_fast_switch_count >= 0) { + cpufreq_fast_switch_count++; + policy->fast_switch_enabled = true; + } else { + pr_warn("CPU%u: Fast frequency switching not enabled\n", + policy->cpu); + cpufreq_list_transition_notifiers(); + } + mutex_unlock(&cpufreq_fast_switch_lock); +} +EXPORT_SYMBOL_GPL(cpufreq_enable_fast_switch); + +static void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) +{ + mutex_lock(&cpufreq_fast_switch_lock); + if (policy->fast_switch_enabled) { + policy->fast_switch_enabled = false; + if (!WARN_ON(cpufreq_fast_switch_count <= 0)) + cpufreq_fast_switch_count--; + } + mutex_unlock(&cpufreq_fast_switch_lock); +} /********************************************************************* * SYSFS INTERFACE * @@ -1319,7 +1382,7 @@ static void cpufreq_offline(unsigned int cpu) /* If cpu is last user of policy, free policy */ if (has_target()) { - ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_exit_governor(policy); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1447,8 +1510,12 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) ret_freq = cpufreq_driver->get(policy->cpu); - /* Updating inactive policies is invalid, so avoid doing that. */ - if (unlikely(policy_is_inactive(policy))) + /* + * Updating inactive policies is invalid, so avoid doing that. Also + * if fast frequency switching is used with the given policy, the check + * against policy->cur is pointless, so skip it in that case too. + */ + if (unlikely(policy_is_inactive(policy)) || policy->fast_switch_enabled) return ret_freq; if (ret_freq && policy->cur && @@ -1672,8 +1739,18 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: + mutex_lock(&cpufreq_fast_switch_lock); + + if (cpufreq_fast_switch_count > 0) { + mutex_unlock(&cpufreq_fast_switch_lock); + return -EBUSY; + } ret = srcu_notifier_chain_register( &cpufreq_transition_notifier_list, nb); + if (!ret) + cpufreq_fast_switch_count--; + + mutex_unlock(&cpufreq_fast_switch_lock); break; case CPUFREQ_POLICY_NOTIFIER: ret = blocking_notifier_chain_register( @@ -1706,8 +1783,14 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: + mutex_lock(&cpufreq_fast_switch_lock); + ret = srcu_notifier_chain_unregister( &cpufreq_transition_notifier_list, nb); + if (!ret && !WARN_ON(cpufreq_fast_switch_count >= 0)) + cpufreq_fast_switch_count++; + + mutex_unlock(&cpufreq_fast_switch_lock); break; case CPUFREQ_POLICY_NOTIFIER: ret = blocking_notifier_chain_unregister( @@ -1726,6 +1809,37 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); * GOVERNORS * *********************************************************************/ +/** + * cpufreq_driver_fast_switch - Carry out a fast CPU frequency switch. + * @policy: cpufreq policy to switch the frequency for. + * @target_freq: New frequency to set (may be approximate). + * + * Carry out a fast frequency switch without sleeping. + * + * The driver's ->fast_switch() callback invoked by this function must be + * suitable for being called from within RCU-sched read-side critical sections + * and it is expected to select the minimum available frequency greater than or + * equal to @target_freq (CPUFREQ_RELATION_L). + * + * This function must not be called if policy->fast_switch_enabled is unset. + * + * Governors calling this function must guarantee that it will never be invoked + * twice in parallel for the same policy and that it will never be called in + * parallel with either ->target() or ->target_index() for the same policy. + * + * If CPUFREQ_ENTRY_INVALID is returned by the driver's ->fast_switch() + * callback to indicate an error condition, the hardware configuration must be + * preserved. + */ +unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + clamp_val(target_freq, policy->min, policy->max); + + return cpufreq_driver->fast_switch(policy, target_freq); +} +EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch); + /* Must set freqs->new to intermediate frequency */ static int __target_intermediate(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, int index) @@ -1946,6 +2060,12 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy) return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); } +static int cpufreq_exit_governor(struct cpufreq_policy *policy) +{ + cpufreq_disable_fast_switch(policy); + return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); +} + int cpufreq_register_governor(struct cpufreq_governor *governor) { int err; @@ -2101,7 +2221,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } - ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_exit_governor(policy); if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2118,7 +2238,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("cpufreq: governor change\n"); return 0; } - cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + cpufreq_exit_governor(policy); } /* new governor failed, so re-start old one */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2b4f248b8ef1..55e69ebb035c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -102,6 +102,17 @@ struct cpufreq_policy { */ struct rw_semaphore rwsem; + /* + * Fast switch flags: + * - fast_switch_possible should be set by the driver if it can + * guarantee that frequency can be changed on any CPU sharing the + * policy and that the change will affect all of the policy CPUs then. + * - fast_switch_enabled is to be set by governors that support fast + * freqnency switching with the help of cpufreq_enable_fast_switch(). + */ + bool fast_switch_possible; + bool fast_switch_enabled; + /* Synchronization for frequency transitions */ bool transition_ongoing; /* Tracks transition status */ spinlock_t transition_lock; @@ -156,6 +167,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); +void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); #else static inline unsigned int cpufreq_get(unsigned int cpu) { @@ -236,6 +248,8 @@ struct cpufreq_driver { unsigned int relation); /* Deprecated */ int (*target_index)(struct cpufreq_policy *policy, unsigned int index); + unsigned int (*fast_switch)(struct cpufreq_policy *policy, + unsigned int target_freq); /* * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION * unset. @@ -464,6 +478,8 @@ struct cpufreq_governor { }; /* Pass a target to the cpufreq driver */ +unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq); int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); -- cgit v1.2.3 From ee2ae1ed46251dcbdcc2c59b5e30f664ddfbacb1 Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Fri, 1 Apr 2016 11:37:33 +0200 Subject: stmmac: add new DT platform entries for GMAC4 This is to support the snps,dwmac-4.00 and snps,dwmac-4.10a and related features on the platform driver. See binding doc for further details. Signed-off-by: Giuseppe Cavallaro Signed-off-by: Alexandre TORGUE Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/stmmac.txt | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 7 +++++++ include/linux/stmmac.h | 2 ++ 3 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index 6605d19601c2..4d302db657c0 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -59,6 +59,8 @@ Optional properties: - snps,fb: fixed-burst - snps,mb: mixed-burst - snps,rb: rebuild INCRx Burst + - snps,tso: this enables the TSO feature otherwise it will be managed by + MAC HW capability register. - mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus. Examples: diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index cf37ea558ecc..effaa4ff5ab7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -284,6 +284,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) plat->pmt = 1; } + if (of_device_is_compatible(np, "snps,dwmac-4.00") || + of_device_is_compatible(np, "snps,dwmac-4.10a")) { + plat->has_gmac4 = 1; + plat->pmt = 1; + plat->tso_en = of_property_read_bool(np, "snps,tso"); + } + if (of_device_is_compatible(np, "snps,dwmac-3.610") || of_device_is_compatible(np, "snps,dwmac-3.710")) { plat->enh_desc = 1; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e6bc30a42a74..ffdaca9c01af 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -137,5 +137,7 @@ struct plat_stmmacenet_data { void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; + int has_gmac4; + bool tso_en; }; #endif -- cgit v1.2.3 From 8cb359e3a1f6318f971bec281623613f48b711be Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 23 Mar 2016 12:34:41 +0000 Subject: iio: buffer: add missing descriptions in iio_buffer_access_funcs The members buffer_group and attrs of iio_buffer_access_funcs have no descriptions for the documentation. Adding them. Fixes: 08e7e0adaa17 ("iio: buffer: Allocate standard attributes in the core") Signed-off-by: Luis de Bethencourt Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 2ec3ad58e8a0..70a5164f4728 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -83,10 +83,12 @@ struct iio_buffer_access_funcs { * @access: [DRIVER] buffer access functions associated with the * implementation. * @scan_el_dev_attr_list:[INTERN] list of scan element related attributes. + * @buffer_group: [INTERN] attributes of the buffer group * @scan_el_group: [DRIVER] attribute group for those attributes not * created from the iio_chan_info array. * @pollq: [INTERN] wait queue to allow for polling on the buffer. * @stufftoread: [INTERN] flag to indicate new data. + * @attrs: [INTERN] standard attributes of the buffer * @demux_list: [INTERN] list of operations required to demux the scan. * @demux_bounce: [INTERN] buffer for doing gather from incoming scan. * @buffer_list: [INTERN] entry in the devices list of current buffers. -- cgit v1.2.3 From fddcca5107051adf9e4481d2a79ae0616577fd2c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Feb 2016 13:20:28 +0100 Subject: mtd: avoid stack overflow in MTD CFI code When map_word gets too large, we use a lot of kernel stack, and for MTD_MAP_BANK_WIDTH_32, this means we use more than the recommended 1024 bytes in a number of functions: drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_write_buffers': drivers/mtd/chips/cfi_cmdset_0020.c:651:1: warning: the frame size of 1336 bytes is larger than 1024 bytes [-Wframe-larger-than=] drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_erase_varsize': drivers/mtd/chips/cfi_cmdset_0020.c:972:1: warning: the frame size of 1208 bytes is larger than 1024 bytes [-Wframe-larger-than=] drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer': drivers/mtd/chips/cfi_cmdset_0001.c:1835:1: warning: the frame size of 1240 bytes is larger than 1024 bytes [-Wframe-larger-than=] This can be avoided if all operations on the map word are done indirectly and the stack gets reused between the calls. We can mostly achieve this by selecting MTD_COMPLEX_MAPPINGS whenever MTD_MAP_BANK_WIDTH_32 is set, but for the case that no other bank width is enabled, we also need to use a non-constant map_bankwidth() to convince the compiler to use less stack. Signed-off-by: Arnd Bergmann [Brian: this patch mostly achieves its goal by forcing MTD_COMPLEX_MAPPINGS (and the accompanying indirection) for 256-bit mappings; the rest of the change is mostly a wash, though it helps reduce stack size slightly. If we really care about supporting 256-bit mappings though, we should consider rewriting some of this code to avoid keeping and assigning so many 256-bit objects on the stack.] Signed-off-by: Brian Norris --- drivers/mtd/chips/Kconfig | 1 + include/linux/mtd/map.h | 19 +++++++------------ 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig index 3b3dabce58de..bbfa1f129266 100644 --- a/drivers/mtd/chips/Kconfig +++ b/drivers/mtd/chips/Kconfig @@ -115,6 +115,7 @@ config MTD_MAP_BANK_WIDTH_16 config MTD_MAP_BANK_WIDTH_32 bool "Support 256-bit buswidth" if MTD_CFI_GEOMETRY + select MTD_COMPLEX_MAPPINGS if HAS_IOMEM default n help If you wish to support CFI devices on a physical bus which is diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 5e0eb7ccabd4..3aa56e3104bb 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -122,18 +122,13 @@ #endif #ifdef CONFIG_MTD_MAP_BANK_WIDTH_32 -# ifdef map_bankwidth -# undef map_bankwidth -# define map_bankwidth(map) ((map)->bankwidth) -# undef map_bankwidth_is_large -# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) -# undef map_words -# define map_words(map) map_calc_words(map) -# else -# define map_bankwidth(map) 32 -# define map_bankwidth_is_large(map) (1) -# define map_words(map) map_calc_words(map) -# endif +/* always use indirect access for 256-bit to preserve kernel stack */ +# undef map_bankwidth +# define map_bankwidth(map) ((map)->bankwidth) +# undef map_bankwidth_is_large +# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) +# undef map_words +# define map_words(map) map_calc_words(map) #define map_bankwidth_is_32(map) (map_bankwidth(map) == 32) #undef MAX_MAP_BANKWIDTH #define MAX_MAP_BANKWIDTH 32 -- cgit v1.2.3 From 5651d6aaf489d1db48c253cf884b40214e91c2c5 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 26 Feb 2016 11:50:28 +0100 Subject: mtd: bcm47xxsflash: use ioremap_cache() instead of KSEG0ADDR() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using KSEG0ADDR makes code highly MIPS dependent and not portable. Thanks to the fix a68f376 ("MIPS: io.h: Define `ioremap_cache'") we can use ioremap_cache which is generic and supported on MIPS as well now. KSEG0ADDR was translating 0x1c000000 into 0x9c000000. With ioremap_cache we use MIPS's __ioremap (and then remap_area_pages). This results in different address (e.g. 0xc0080000) but it still should be cached as expected and it was successfully tested with BCM47186B0. Other than that drivers/bcma/driver_chipcommon_sflash.c nicely setups a struct resource for access window, but we wren't using it. Use it now and drop duplicated info. Signed-off-by: Brian Norris Signed-off-by: Rafał Miłecki --- drivers/bcma/driver_chipcommon_sflash.c | 1 - drivers/mtd/devices/bcm47xxsflash.c | 29 ++++++++++++++++++++++++----- drivers/mtd/devices/bcm47xxsflash.h | 3 ++- include/linux/bcma/bcma_driver_chipcommon.h | 1 - 4 files changed, 26 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_chipcommon_sflash.c b/drivers/bcma/driver_chipcommon_sflash.c index 04d706ca5f43..35b13a08ca3e 100644 --- a/drivers/bcma/driver_chipcommon_sflash.c +++ b/drivers/bcma/driver_chipcommon_sflash.c @@ -146,7 +146,6 @@ int bcma_sflash_init(struct bcma_drv_cc *cc) return -ENOTSUPP; } - sflash->window = BCMA_SOC_FLASH2; sflash->blocksize = e->blocksize; sflash->numblocks = e->numblocks; sflash->size = sflash->blocksize * sflash->numblocks; diff --git a/drivers/mtd/devices/bcm47xxsflash.c b/drivers/mtd/devices/bcm47xxsflash.c index 347bb83db864..1c65c15b31a1 100644 --- a/drivers/mtd/devices/bcm47xxsflash.c +++ b/drivers/mtd/devices/bcm47xxsflash.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -109,8 +110,7 @@ static int bcm47xxsflash_read(struct mtd_info *mtd, loff_t from, size_t len, if ((from + len) > mtd->size) return -EINVAL; - memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(b47s->window + from), - len); + memcpy_fromio(buf, b47s->window + from, len); *retlen = len; return len; @@ -275,15 +275,33 @@ static void bcm47xxsflash_bcma_cc_write(struct bcm47xxsflash *b47s, u16 offset, static int bcm47xxsflash_bcma_probe(struct platform_device *pdev) { - struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev); + struct device *dev = &pdev->dev; + struct bcma_sflash *sflash = dev_get_platdata(dev); struct bcm47xxsflash *b47s; + struct resource *res; int err; - b47s = devm_kzalloc(&pdev->dev, sizeof(*b47s), GFP_KERNEL); + b47s = devm_kzalloc(dev, sizeof(*b47s), GFP_KERNEL); if (!b47s) return -ENOMEM; sflash->priv = b47s; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "invalid resource\n"); + return -EINVAL; + } + if (!devm_request_mem_region(dev, res->start, resource_size(res), + res->name)) { + dev_err(dev, "can't request region for resource %pR\n", res); + return -EBUSY; + } + b47s->window = ioremap_cache(res->start, resource_size(res)); + if (!b47s->window) { + dev_err(dev, "ioremap failed for resource %pR\n", res); + return -ENOMEM; + } + b47s->bcma_cc = container_of(sflash, struct bcma_drv_cc, sflash); b47s->cc_read = bcm47xxsflash_bcma_cc_read; b47s->cc_write = bcm47xxsflash_bcma_cc_write; @@ -297,7 +315,6 @@ static int bcm47xxsflash_bcma_probe(struct platform_device *pdev) break; } - b47s->window = sflash->window; b47s->blocksize = sflash->blocksize; b47s->numblocks = sflash->numblocks; b47s->size = sflash->size; @@ -306,6 +323,7 @@ static int bcm47xxsflash_bcma_probe(struct platform_device *pdev) err = mtd_device_parse_register(&b47s->mtd, probes, NULL, NULL, 0); if (err) { pr_err("Failed to register MTD device: %d\n", err); + iounmap(b47s->window); return err; } @@ -321,6 +339,7 @@ static int bcm47xxsflash_bcma_remove(struct platform_device *pdev) struct bcm47xxsflash *b47s = sflash->priv; mtd_device_unregister(&b47s->mtd); + iounmap(b47s->window); return 0; } diff --git a/drivers/mtd/devices/bcm47xxsflash.h b/drivers/mtd/devices/bcm47xxsflash.h index fe93daf4f489..1564b62b412e 100644 --- a/drivers/mtd/devices/bcm47xxsflash.h +++ b/drivers/mtd/devices/bcm47xxsflash.h @@ -65,7 +65,8 @@ struct bcm47xxsflash { enum bcm47xxsflash_type type; - u32 window; + void __iomem *window; + u32 blocksize; u16 numblocks; u32 size; diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 846513c73606..a5ac2cad5cb7 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -587,7 +587,6 @@ struct mtd_info; struct bcma_sflash { bool present; - u32 window; u32 blocksize; u16 numblocks; u32 size; -- cgit v1.2.3 From 5b01e4b9efa0b78672cbbea830c9fbcc7f239e29 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Apr 2016 11:43:54 +0200 Subject: libata: Implement NCQ autosense Some newer devices support NCQ autosense (cf ACS-4), so we should be using it to retrieve the sense code and speed up recovery. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- drivers/ata/libata-eh.c | 12 ++++++++++++ drivers/ata/libata-scsi.c | 7 ++++++- drivers/ata/libata.h | 1 + include/linux/ata.h | 2 ++ 4 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 961acc788f44..8c8355f0792e 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1600,6 +1600,8 @@ static int ata_eh_read_log_10h(struct ata_device *dev, tf->hob_lbah = buf[10]; tf->nsect = buf[12]; tf->hob_nsect = buf[13]; + if (ata_id_has_ncq_autosense(dev->id)) + tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; return 0; } @@ -1797,6 +1799,16 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) memcpy(&qc->result_tf, &tf, sizeof(tf)); qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; + if (qc->result_tf.auxiliary) { + char sense_key, asc, ascq; + + sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; + asc = (qc->result_tf.auxiliary >> 8) & 0xff; + ascq = qc->result_tf.auxiliary & 0xff; + ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq); + qc->flags |= ATA_QCFLAG_SENSE_VALID; + } + ehc->i.err_mask &= ~AC_ERR_DEV; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 567859ce0512..6dc2fadfd7c5 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -270,8 +270,11 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); -static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { + if (!cmd) + return; + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); @@ -1784,6 +1787,8 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) if (((cdb[0] == ATA_16) || (cdb[0] == ATA_12)) && ((cdb[2] & 0x20) || need_sense)) ata_gen_passthru_sense(qc); + else if (qc->flags & ATA_QCFLAG_SENSE_VALID) + cmd->result = SAM_STAT_CHECK_CONDITION; else if (need_sense) ata_gen_ata_sense(qc); else diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index f840ca18a7c0..8cfdd9616d16 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -137,6 +137,7 @@ extern int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht); extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); +extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); diff --git a/include/linux/ata.h b/include/linux/ata.h index c1a2f345cbe6..e797e1b53006 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -528,6 +528,8 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) +#define ata_id_has_ncq_autosense(id) \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { -- cgit v1.2.3 From e87fd28cf9a2d9018ac4b6dd92f0b417714bc18d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Apr 2016 11:43:55 +0200 Subject: libata: Implement support for sense data reporting ACS-4 defines a sense data reporting feature set. This patch implements support for it. tj: Cosmetic formatting updates. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 20 +++++++++++++- drivers/ata/libata-eh.c | 68 ++++++++++++++++++++++++++++++++++++++++++++--- include/linux/ata.h | 16 +++++++++++ 3 files changed, 99 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 55e257c268dd..f991f786227e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2148,6 +2148,24 @@ static int ata_dev_config_ncq(struct ata_device *dev, return 0; } +static void ata_dev_config_sense_reporting(struct ata_device *dev) +{ + unsigned int err_mask; + + if (!ata_id_has_sense_reporting(dev->id)) + return; + + if (ata_id_sense_reporting_enabled(dev->id)) + return; + + err_mask = ata_dev_set_feature(dev, SETFEATURE_SENSE_DATA, 0x1); + if (err_mask) { + ata_dev_dbg(dev, + "failed to enable Sense Data Reporting, Emask 0x%x\n", + err_mask); + } +} + /** * ata_dev_configure - Configure the specified ATA/ATAPI device * @dev: Target device to configure @@ -2370,7 +2388,7 @@ int ata_dev_configure(struct ata_device *dev) dev->devslp_timing[i] = sata_setting[j]; } } - + ata_dev_config_sense_reporting(dev); dev->cdb_len = 16; } diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 8c8355f0792e..170e891e79af 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1637,6 +1637,56 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) return err_mask; } +/** + * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT + * @dev: device to perform REQUEST_SENSE_SENSE_DATA_EXT to + * @cmd: scsi command for which the sense code should be set + * + * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK + * SENSE. This function is an EH helper. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +static void ata_eh_request_sense(struct ata_queued_cmd *qc, + struct scsi_cmnd *cmd) +{ + struct ata_device *dev = qc->dev; + struct ata_taskfile tf; + unsigned int err_mask; + + if (qc->ap->pflags & ATA_PFLAG_FROZEN) { + ata_dev_warn(dev, "sense data available but port frozen\n"); + return; + } + + if (!cmd) + return; + + if (!ata_id_sense_reporting_enabled(dev->id)) { + ata_dev_warn(qc->dev, "sense data reporting disabled\n"); + return; + } + + DPRINTK("ATA request sense\n"); + + ata_tf_init(dev, &tf); + tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; + tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; + tf.command = ATA_CMD_REQ_SENSE_DATA; + tf.protocol = ATA_PROT_NODATA; + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + /* Ignore err_mask; ATA_ERR might be set */ + if (tf.command & ATA_SENSE) { + ata_scsi_set_sense(cmd, tf.lbah, tf.lbam, tf.lbal); + qc->flags |= ATA_QCFLAG_SENSE_VALID; + } else { + ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", + tf.command, err_mask); + } +} + /** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to @@ -1838,14 +1888,23 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_RESET; } - if (stat & (ATA_ERR | ATA_DF)) + if (stat & (ATA_ERR | ATA_DF)) { qc->err_mask |= AC_ERR_DEV; - else + /* + * Sense data reporting does not work if the + * device fault bit is set. + */ + if (stat & ATA_DF) + stat &= ~ATA_SENSE; + } else { return 0; + } switch (qc->dev->class) { case ATA_DEV_ATA: case ATA_DEV_ZAC: + if (stat & ATA_SENSE) + ata_eh_request_sense(qc, qc->scsicmd); if (err & ATA_ICRC) qc->err_mask |= AC_ERR_ATA_BUS; if (err & (ATA_UNC | ATA_AMNF)) @@ -2581,14 +2640,15 @@ static void ata_eh_link_report(struct ata_link *link) #ifdef CONFIG_ATA_VERBOSE_ERROR if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | - ATA_ERR)) { + ATA_SENSE | ATA_ERR)) { if (res->command & ATA_BUSY) ata_dev_err(qc->dev, "status: { Busy }\n"); else - ata_dev_err(qc->dev, "status: { %s%s%s%s}\n", + ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", res->command & ATA_DRDY ? "DRDY " : "", res->command & ATA_DF ? "DF " : "", res->command & ATA_DRQ ? "DRQ " : "", + res->command & ATA_SENSE ? "SENSE " : "", res->command & ATA_ERR ? "ERR " : ""); } diff --git a/include/linux/ata.h b/include/linux/ata.h index e797e1b53006..00aebc4c83ad 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -385,6 +385,8 @@ enum { SATA_SSP = 0x06, /* Software Settings Preservation */ SATA_DEVSLP = 0x09, /* Device Sleep */ + SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ + /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, ATA_SET_MAX_PASSWD = 0x01, @@ -718,6 +720,20 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) return false; } +static inline bool ata_id_has_sense_reporting(const u16 *id) +{ + if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + return false; + return id[ATA_ID_COMMAND_SET_3] & (1 << 6); +} + +static inline bool ata_id_sense_reporting_enabled(const u16 *id) +{ + if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + return false; + return id[ATA_ID_COMMAND_SET_4] & (1 << 6); +} + /** * ata_id_major_version - get ATA level of drive * @id: Identify data -- cgit v1.2.3 From 06dbde5f3a44248fc02e24d662ac4849202abb48 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Apr 2016 11:44:03 +0200 Subject: libata: Implement control mode page to select sense format Implement MODE SELECT for the control mode page to allow the OS to switch to descriptor sense. tj: Dropped s/sb/cmd->sense_buffer/ in ata_gen_ata_sense(). Added @dev description to ata_msense_ctl_mode(). Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- drivers/ata/libata-eh.c | 4 +- drivers/ata/libata-scsi.c | 116 ++++++++++++++++++++++++++++++++++------------ drivers/ata/libata.h | 3 +- include/linux/libata.h | 1 + 4 files changed, 91 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index e37258b78e01..5b340ce4eeac 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1679,7 +1679,7 @@ static void ata_eh_request_sense(struct ata_queued_cmd *qc, err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); /* Ignore err_mask; ATA_ERR might be set */ if (tf.command & ATA_SENSE) { - ata_scsi_set_sense(cmd, tf.lbah, tf.lbam, tf.lbal); + ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); qc->flags |= ATA_QCFLAG_SENSE_VALID; } else { ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", @@ -1855,7 +1855,7 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; asc = (qc->result_tf.auxiliary >> 8) & 0xff; ascq = qc->result_tf.auxiliary & 0xff; - ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq); + ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq); ata_scsi_set_sense_information(dev, qc->scsicmd, &qc->result_tf); qc->flags |= ATA_QCFLAG_SENSE_VALID; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 0da03c019f27..2389247bdf6f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -270,14 +270,17 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); -void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd, + u8 sk, u8 asc, u8 ascq) { + bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE); + if (!cmd) return; cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; - scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); + scsi_build_sense_buffer(d_sense, cmd->sense_buffer, sk, asc, ascq); } void ata_scsi_set_sense_information(struct ata_device *dev, @@ -384,9 +387,10 @@ struct device_attribute *ata_common_sdev_attrs[] = { }; EXPORT_SYMBOL_GPL(ata_common_sdev_attrs); -static void ata_scsi_invalid_field(struct scsi_cmnd *cmd) +static void ata_scsi_invalid_field(struct ata_device *dev, + struct scsi_cmnd *cmd) { - ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, 0x24, 0x0); + ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x24, 0x0); /* "Invalid field in cbd" */ cmd->scsi_done(cmd); } @@ -1014,7 +1018,7 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature, &sense_key, &asc, &ascq, verbose); - ata_scsi_set_sense(cmd, sense_key, asc, ascq); + ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); } else { /* * ATA PASS-THROUGH INFORMATION AVAILABLE @@ -1112,12 +1116,12 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature, &sense_key, &asc, &ascq, verbose); - ata_scsi_set_sense(cmd, sense_key, asc, ascq); + ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq); } else { /* Could not decode error */ ata_dev_warn(dev, "could not decode error status 0x%x err_mask 0x%x\n", tf->command, qc->err_mask); - ata_scsi_set_sense(cmd, ABORTED_COMMAND, 0, 0); + ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); return; } @@ -1440,7 +1444,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) return 0; invalid_fld: - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x24, 0x0); /* "Invalid field in cbd" */ return 1; skip: @@ -1679,12 +1683,12 @@ static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc) return 0; invalid_fld: - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x24, 0x0); /* "Invalid field in cbd" */ return 1; out_of_range: - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x21, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x0); /* "Logical Block Address out of range" */ return 1; @@ -1781,12 +1785,12 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) goto out_of_range; /* treat all other errors as -EINVAL, fall through */ invalid_fld: - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x24, 0x0); /* "Invalid field in cbd" */ return 1; out_of_range: - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x21, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x0); /* "Logical Block Address out of range" */ return 1; @@ -2358,6 +2362,7 @@ static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable) /** * ata_msense_ctl_mode - Simulate MODE SENSE control mode page + * @dev: ATA device of interest * @buf: output buffer * @changeable: whether changeable parameters are requested * @@ -2366,9 +2371,12 @@ static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable) * LOCKING: * None. */ -static unsigned int ata_msense_ctl_mode(u8 *buf, bool changeable) +static unsigned int ata_msense_ctl_mode(struct ata_device *dev, u8 *buf, + bool changeable) { modecpy(buf, def_control_mpage, sizeof(def_control_mpage), changeable); + if (changeable && (dev->flags & ATA_DFLAG_D_SENSE)) + buf[2] |= (1 << 2); /* Descriptor sense requested */ return sizeof(def_control_mpage); } @@ -2482,13 +2490,13 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) break; case CONTROL_MPAGE: - p += ata_msense_ctl_mode(p, page_control == 1); + p += ata_msense_ctl_mode(args->dev, p, page_control == 1); break; case ALL_MPAGES: p += ata_msense_rw_recovery(p, page_control == 1); p += ata_msense_caching(args->id, p, page_control == 1); - p += ata_msense_ctl_mode(p, page_control == 1); + p += ata_msense_ctl_mode(args->dev, p, page_control == 1); break; default: /* invalid page code */ @@ -2521,12 +2529,12 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) return 0; invalid_fld: - ata_scsi_set_sense(args->cmd, ILLEGAL_REQUEST, 0x24, 0x0); + ata_scsi_set_sense(dev, args->cmd, ILLEGAL_REQUEST, 0x24, 0x0); /* "Invalid field in cbd" */ return 1; saving_not_supp: - ata_scsi_set_sense(args->cmd, ILLEGAL_REQUEST, 0x39, 0x0); + ata_scsi_set_sense(dev, args->cmd, ILLEGAL_REQUEST, 0x39, 0x0); /* "Saving parameters not supported" */ return 1; } @@ -3163,7 +3171,7 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) return 0; invalid_fld: - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x00); + ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x24, 0x00); /* "Invalid field in cdb" */ return 1; } @@ -3228,7 +3236,7 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) return 0; invalid_fld: - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x00); + ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x24, 0x00); /* "Invalid field in cdb" */ return 1; } @@ -3279,6 +3287,51 @@ static int ata_mselect_caching(struct ata_queued_cmd *qc, return 0; } +/** + * ata_mselect_control - Simulate MODE SELECT for control page + * @qc: Storage for translated ATA taskfile + * @buf: input buffer + * @len: number of valid bytes in the input buffer + * + * Prepare a taskfile to modify caching information for the device. + * + * LOCKING: + * None. + */ +static int ata_mselect_control(struct ata_queued_cmd *qc, + const u8 *buf, int len) +{ + struct ata_device *dev = qc->dev; + char mpage[CONTROL_MPAGE_LEN]; + u8 d_sense; + + /* + * The first two bytes of def_control_mpage are a header, so offsets + * in mpage are off by 2 compared to buf. Same for len. + */ + + if (len != CONTROL_MPAGE_LEN - 2) + return -EINVAL; + + d_sense = buf[0] & (1 << 2); + + /* + * Check that read-only bits are not modified. + */ + ata_msense_ctl_mode(dev, mpage, false); + mpage[2] &= ~(1 << 2); + mpage[2] |= d_sense; + if (memcmp(mpage + 2, buf, CONTROL_MPAGE_LEN - 2) != 0) + return -EINVAL; + if (d_sense & (1 << 2)) + dev->flags |= ATA_DFLAG_D_SENSE; + else + dev->flags &= ~ATA_DFLAG_D_SENSE; + qc->scsicmd->result = SAM_STAT_GOOD; + qc->scsicmd->scsi_done(qc->scsicmd); + return 0; +} + /** * ata_scsiop_mode_select - Simulate MODE SELECT 6, 10 commands * @qc: Storage for translated ATA taskfile @@ -3381,7 +3434,10 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) if (ata_mselect_caching(qc, p, pg_len) < 0) goto invalid_param; break; - + case CONTROL_MPAGE: + if (ata_mselect_control(qc, p, pg_len) < 0) + goto invalid_param; + break; default: /* invalid page code */ goto invalid_param; } @@ -3397,17 +3453,17 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) invalid_fld: /* "Invalid field in CDB" */ - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x24, 0x0); return 1; invalid_param: /* "Invalid field in parameter list" */ - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x26, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x26, 0x0); return 1; invalid_param_len: /* "Parameter list length error" */ - ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x1a, 0x0); + ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0); return 1; skip: @@ -3611,12 +3667,12 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) switch(scsicmd[0]) { /* TODO: worth improving? */ case FORMAT_UNIT: - ata_scsi_invalid_field(cmd); + ata_scsi_invalid_field(dev, cmd); break; case INQUIRY: if (scsicmd[1] & 2) /* is CmdDt set? */ - ata_scsi_invalid_field(cmd); + ata_scsi_invalid_field(dev, cmd); else if ((scsicmd[1] & 1) == 0) /* is EVPD clear? */ ata_scsi_rbuf_fill(&args, ata_scsiop_inq_std); else switch (scsicmd[2]) { @@ -3642,7 +3698,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2); break; default: - ata_scsi_invalid_field(cmd); + ata_scsi_invalid_field(dev, cmd); break; } break; @@ -3660,7 +3716,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) if ((scsicmd[1] & 0x1f) == SAI_READ_CAPACITY_16) ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap); else - ata_scsi_invalid_field(cmd); + ata_scsi_invalid_field(dev, cmd); break; case REPORT_LUNS: @@ -3668,7 +3724,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) break; case REQUEST_SENSE: - ata_scsi_set_sense(cmd, 0, 0, 0); + ata_scsi_set_sense(dev, cmd, 0, 0, 0); cmd->result = (DRIVER_SENSE << 24); cmd->scsi_done(cmd); break; @@ -3692,12 +3748,12 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) if ((tmp8 == 0x4) && (!scsicmd[3]) && (!scsicmd[4])) ata_scsi_rbuf_fill(&args, ata_scsiop_noop); else - ata_scsi_invalid_field(cmd); + ata_scsi_invalid_field(dev, cmd); break; /* all other commands */ default: - ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, 0x20, 0x0); + ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x20, 0x0); /* "Invalid command operation code" */ cmd->scsi_done(cmd); break; diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index dbc67604b3c5..3b301a48007c 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -138,7 +138,8 @@ extern int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht); extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); -extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); +extern void ata_scsi_set_sense(struct ata_device *dev, + struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); extern void ata_scsi_set_sense_information(struct ata_device *dev, struct scsi_cmnd *cmd, const struct ata_taskfile *tf); diff --git a/include/linux/libata.h b/include/linux/libata.h index 2c4ebef79d0c..a418bca0df0d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -180,6 +180,7 @@ enum { ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ + ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */ ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ -- cgit v1.2.3 From 4113652252fad972e0c191b1e536dc74a6faebdc Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 1 Apr 2016 21:38:16 +0200 Subject: reset: Add missing function stub for device_reset The Mediatek's thermal driver fails to compile when the RESET_CONTROLLER option is not set. Logically, as the driver depends on this option to compile, the Kconfig should select it but actually that is not correct because the Kconfig provides also the COMPILE_TEST to increase the compile test coverage. By providing the missing 'device_reset' stub for the driver in reset.h, that let the kernel to compile on different platforms with the Mediatek thermal driver enabled with the COMPILE_TEST option. Signed-off-by: Daniel Lezcano Signed-off-by: Philipp Zabel --- include/linux/reset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index a552134a209e..ec0306ce7b92 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -56,6 +56,12 @@ static inline void reset_control_put(struct reset_control *rstc) WARN_ON(1); } +static inline int __must_check device_reset(struct device *dev) +{ + WARN_ON(1); + return -ENOTSUPP; +} + static inline int device_reset_optional(struct device *dev) { return -ENOTSUPP; -- cgit v1.2.3 From 974e0a4537f556867483f493c7f67ccdcb7fc504 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Apr 2016 12:17:09 -0400 Subject: libata-core: Allow longer timeout for drive spinup from PUIS When spinning up a drive from powered on standby mode (PUIS), SETFEATURES_SPINUP is executed with the default timeout used for any SETFEATURES subcommand, that is 5+10 seconds. The total 15s is too short for some drives to complete spinup (e.g. drives with a large indirection table stored on media), resulting in ata_dev_read_id to fail twice on the execution of SETFEATURES_SPINUP. For this feature, allow a larger default timeout of 30 seconds. However, in the same spirit as with the timeout of other feature subcommands, do not ignore ata_probe_timeout if it is set). Signed-off-by: Damien Le Moal Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 6 +++++- include/linux/ata.h | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 55e257c268dd..7b21021dbf7d 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4528,6 +4528,7 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature) { struct ata_taskfile tf; unsigned int err_mask; + unsigned long timeout = 0; /* set up set-features taskfile */ DPRINTK("set features - SATA features\n"); @@ -4539,7 +4540,10 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature) tf.protocol = ATA_PROT_NODATA; tf.nsect = feature; - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (enable == SETFEATURES_SPINUP) + timeout = ata_probe_timeout ? + ata_probe_timeout * 1000 : SETFEATURES_SPINUP_TIMEOUT; + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, timeout); DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; diff --git a/include/linux/ata.h b/include/linux/ata.h index c1a2f345cbe6..f310ec0f072e 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -371,7 +371,8 @@ enum { SETFEATURES_AAM_ON = 0x42, SETFEATURES_AAM_OFF = 0xC2, - SETFEATURES_SPINUP = 0x07, /* Spin-up drive */ + SETFEATURES_SPINUP = 0x07, /* Spin-up drive */ + SETFEATURES_SPINUP_TIMEOUT = 30000, /* 30s timeout for drive spin-up from PUIS */ SETFEATURES_SATA_ENABLE = 0x10, /* Enable use of SATA feature */ SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ -- cgit v1.2.3 From 77ed2c5745d93416992857d124f35834b62b3e70 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 8 Mar 2016 20:01:32 +0900 Subject: android,lowmemorykiller: Don't abuse TIF_MEMDIE. Currently, lowmemorykiller (LMK) is using TIF_MEMDIE for two purposes. One is to remember processes killed by LMK, and the other is to accelerate termination of processes killed by LMK. But since LMK is invoked as a memory shrinker function, there still should be some memory available. It is very likely that memory allocations by processes killed by LMK will succeed without using ALLOC_NO_WATERMARKS via TIF_MEMDIE. Even if their allocations cannot escape from memory allocation loop unless they use ALLOC_NO_WATERMARKS, lowmem_deathpending_timeout can guarantee forward progress by choosing next victim process. On the other hand, mark_oom_victim() assumes that it must be called with oom_lock held and it must not be called after oom_killer_disable() was called. But LMK is calling it without holding oom_lock and checking oom_killer_disabled. It is possible that LMK calls mark_oom_victim() due to allocation requests by kernel threads after current thread returned from oom_killer_disabled(). This will break synchronization for PM/suspend. This patch introduces per a task_struct flag for remembering processes killed by LMK, and replaces TIF_MEMDIE with that flag. By applying this patch, assumption by mark_oom_victim() becomes true. Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: Arve Hjonnevag Cc: Riley Andrews Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/lowmemorykiller.c | 9 ++------- include/linux/sched.h | 4 ++++ 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 2509e5df7244..c79f22425fa8 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -131,7 +131,7 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) if (!p) continue; - if (test_tsk_thread_flag(p, TIF_MEMDIE) && + if (task_lmk_waiting(p) && p->mm && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); rcu_read_unlock(); @@ -162,13 +162,8 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) if (selected) { task_lock(selected); send_sig(SIGKILL, selected, 0); - /* - * FIXME: lowmemorykiller shouldn't abuse global OOM killer - * infrastructure. There is no real reason why the selected - * task should have access to the memory reserves. - */ if (selected->mm) - mark_oom_victim(selected); + task_set_lmk_waiting(selected); task_unlock(selected); lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" " to free %ldkB on behalf of '%s' (%d) because\n" diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..9dff190e6a0a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2184,6 +2184,7 @@ static inline void memalloc_noio_restore(unsigned int flags) #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define TASK_PFA_TEST(name, func) \ @@ -2207,6 +2208,9 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(LMK_WAITING, lmk_waiting) +TASK_PFA_SET(LMK_WAITING, lmk_waiting) + /* * task->jobctl flags */ -- cgit v1.2.3 From ca065d0cf80fa547724440a8bf37f1e674d917c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Apr 2016 08:52:13 -0700 Subject: udp: no longer use SLAB_DESTROY_BY_RCU Tom Herbert would like not touching UDP socket refcnt for encapsulated traffic. For this to happen, we need to use normal RCU rules, with a grace period before freeing a socket. UDP sockets are not short lived in the high usage case, so the added cost of call_rcu() should not be a concern. This actually removes a lot of complexity in UDP stack. Multicast receives no longer need to hold a bucket spinlock. Note that ip early demux still needs to take a reference on the socket. Same remark for functions used by xt_socket and xt_PROXY netfilter modules, but this might be changed later. Performance for a single UDP socket receiving flood traffic from many RX queues/cpus. Simple udp_rx using simple recvfrom() loop : 438 kpps instead of 374 kpps : 17 % increase of the peak rate. v2: Addressed Willem de Bruijn feedback in multicast handling - keep early demux break in __udp4_lib_demux_lookup() Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Willem de Bruijn Tested-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 8 +- include/net/sock.h | 12 +-- include/net/udp.h | 2 +- net/ipv4/udp.c | 293 ++++++++++++++++------------------------------------ net/ipv4/udp_diag.c | 18 ++-- net/ipv6/udp.c | 196 ++++++++++++----------------------- 6 files changed, 171 insertions(+), 358 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 87c094961bd5..32342754643a 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } -#define udp_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry(__sk, list) \ + hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) -#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) diff --git a/include/net/sock.h b/include/net/sock.h index 9e77353a92ae..7ad73db9dde2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -178,7 +178,7 @@ struct sock_common { int skc_bound_dev_if; union { struct hlist_node skc_bind_node; - struct hlist_nulls_node skc_portaddr_node; + struct hlist_node skc_portaddr_node; }; struct proto *skc_prot; possible_net_t skc_net; @@ -670,18 +670,18 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry(__sk, list, sk_bind_node) /** - * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset + * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @head: the head for your list. * @offset: offset of hlist_node within the struct. * */ -#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \ - for (pos = (head)->first; \ - (!is_a_nulls(pos)) && \ +#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ + for (pos = rcu_dereference((head)->first); \ + pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) static inline struct user_namespace *sk_user_ns(struct sock *sk) { diff --git a/include/net/udp.h b/include/net/udp.h index 92927f729ac8..d870ec1611c4 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -59,7 +59,7 @@ struct udp_skb_cb { * @lock: spinlock protecting changes to head/count */ struct udp_hslot { - struct hlist_nulls_head head; + struct hlist_head head; int count; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 45ff590661f4..355bdb221057 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -143,10 +143,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, unsigned int log) { struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); - sk_nulls_for_each(sk2, node, &hslot->head) { + sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (bitmap || udp_sk(sk2)->udp_port_hash == num) && @@ -177,12 +176,11 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, bool match_wildcard)) { struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); - udp_portaddr_for_each_entry(sk2, node, &hslot2->head) { + udp_portaddr_for_each_entry(sk2, &hslot2->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (udp_sk(sk2)->udp_port_hash == num) && @@ -207,11 +205,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, bool match_wildcard)) { struct net *net = sock_net(sk); - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); struct sock *sk2; - sk_nulls_for_each(sk2, node, &hslot->head) { + sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_family == sk->sk_family && @@ -333,17 +330,18 @@ found: goto fail_unlock; } - sk_nulls_add_node_rcu(sk, &hslot->head); + sk_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock(&hslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, &hslot2->head); hslot2->count++; spin_unlock(&hslot2->lock); } + sock_set_flag(sk, SOCK_RCU_FREE); error = 0; fail_unlock: spin_unlock_bh(&hslot->lock); @@ -497,37 +495,27 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; -begin: result = NULL; badness = 0; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + badness = score; + result = sk; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -535,23 +523,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot2) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score2(result, net, saddr, sport, - daddr, hnum, dif) < badness)) { - sock_put(result); - goto begin; - } - } return result; } @@ -563,15 +534,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; - rcu_read_lock(); if (hslot->count > 10) { hash2 = udp4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; @@ -593,35 +561,27 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, htonl(INADDR_ANY), hnum, dif, hslot2, slot2, skb); } - rcu_read_unlock(); return result; } begin: result = NULL; badness = 0; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -629,25 +589,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score(result, net, saddr, hnum, sport, - daddr, dport, dif) < badness)) { - sock_put(result); - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__udp4_lib_lookup); @@ -663,13 +604,24 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, udptable, skb); } +/* Must be called under rcu_read_lock(). + * Does increment socket refcount. + */ +#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, - &udp_table, NULL); + struct sock *sk; + + sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, + dif, &udp_table, NULL); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + return sk; } EXPORT_SYMBOL_GPL(udp4_lib_lookup); +#endif static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, @@ -771,7 +723,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk->sk_err = err; sk->sk_error_report(sk); out: - sock_put(sk); + return; } void udp_err(struct sk_buff *skb, u32 info) @@ -1474,13 +1426,13 @@ void udp_lib_unhash(struct sock *sk) spin_lock_bh(&hslot->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (sk_nulls_del_node_init_rcu(sk)) { + if (sk_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); } @@ -1513,12 +1465,12 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) if (hslot2 != nhslot2) { spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); spin_lock(&nhslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, &nhslot2->head); nhslot2->count++; spin_unlock(&nhslot2->lock); @@ -1697,35 +1649,6 @@ drop: return -1; } -static void flush_stack(struct sock **stack, unsigned int count, - struct sk_buff *skb, unsigned int final) -{ - unsigned int i; - struct sk_buff *skb1 = NULL; - struct sock *sk; - - for (i = 0; i < count; i++) { - sk = stack[i]; - if (likely(!skb1)) - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - - if (!skb1) { - atomic_inc(&sk->sk_drops); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - } - - if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) - skb1 = NULL; - - sock_put(sk); - } - if (unlikely(skb1)) - kfree_skb(skb1); -} - /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ @@ -1749,14 +1672,14 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udp_table *udptable, int proto) { - struct sock *sk, *stack[256 / sizeof(struct sock *)]; - struct hlist_nulls_node *node; + struct sock *sk, *first = NULL; unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - int dif = skb->dev->ifindex; - unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - bool inner_flushed = false; + unsigned int offset = offsetof(typeof(*sk), sk_node); + int dif = skb->dev->ifindex; + struct hlist_node *node; + struct sk_buff *nskb; if (use_hash2) { hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & @@ -1767,23 +1690,28 @@ start_lookup: offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } - spin_lock(&hslot->lock); - sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { - if (__udp_is_mcast_sock(net, sk, - uh->dest, daddr, - uh->source, saddr, - dif, hnum)) { - if (unlikely(count == ARRAY_SIZE(stack))) { - flush_stack(stack, count, skb, ~0); - inner_flushed = true; - count = 0; - } - stack[count++] = sk; - sock_hold(sk); + sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { + if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, + uh->source, saddr, dif, hnum)) + continue; + + if (!first) { + first = sk; + continue; } - } + nskb = skb_clone(skb, GFP_ATOMIC); - spin_unlock(&hslot->lock); + if (unlikely(!nskb)) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + continue; + } + if (udp_queue_rcv_skb(sk, nskb) > 0) + consume_skb(nskb); + } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { @@ -1791,16 +1719,13 @@ start_lookup: goto start_lookup; } - /* - * do the slow work with no lock held - */ - if (count) { - flush_stack(stack, count, skb, count - 1); + if (first) { + if (udp_queue_rcv_skb(first, skb) > 0) + consume_skb(skb); } else { - if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); - consume_skb(skb); + kfree_skb(skb); + UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -1897,7 +1822,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, inet_compute_pseudo); ret = udp_queue_rcv_skb(sk, skb); - sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 @@ -1958,49 +1882,24 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, int dif) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(loc_port); - unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); + unsigned int slot = udp_hashfn(net, hnum, udp_table.mask); struct udp_hslot *hslot = &udp_table.hash[slot]; /* Do not bother scanning a too big list */ if (hslot->count > 10) return NULL; - rcu_read_lock(); -begin: - count = 0; result = NULL; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { - if (__udp_is_mcast_sock(net, sk, - loc_port, loc_addr, - rmt_port, rmt_addr, - dif, hnum)) { + sk_for_each_rcu(sk, &hslot->head) { + if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, + rmt_port, rmt_addr, dif, hnum)) { + if (result) + return NULL; result = sk; - ++count; - } - } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { - if (count != 1 || - unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(!__udp_is_mcast_sock(net, result, - loc_port, loc_addr, - rmt_port, rmt_addr, - dif, hnum))) { - sock_put(result); - result = NULL; } } - rcu_read_unlock(); + return result; } @@ -2013,37 +1912,22 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, __be16 rmt_port, __be32 rmt_addr, int dif) { - struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(loc_port); unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + struct sock *sk; - rcu_read_lock(); - result = NULL; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { - if (INET_MATCH(sk, net, acookie, - rmt_addr, loc_addr, ports, dif)) - result = sk; + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { + if (INET_MATCH(sk, net, acookie, rmt_addr, + loc_addr, ports, dif)) + return sk; /* Only check first socket in chain */ break; } - - if (result) { - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(!INET_MATCH(sk, net, acookie, - rmt_addr, loc_addr, - ports, dif))) { - sock_put(result); - result = NULL; - } - } - rcu_read_unlock(); - return result; + return NULL; } void udp_v4_early_demux(struct sk_buff *skb) @@ -2051,7 +1935,7 @@ void udp_v4_early_demux(struct sk_buff *skb) struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct udphdr *uh; - struct sock *sk; + struct sock *sk = NULL; struct dst_entry *dst; int dif = skb->dev->ifindex; int ours; @@ -2083,11 +1967,9 @@ void udp_v4_early_demux(struct sk_buff *skb) } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - } else { - return; } - if (!sk) + if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) return; skb->sk = sk; @@ -2387,14 +2269,13 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) for (state->bucket = start; state->bucket <= state->udp_table->mask; ++state->bucket) { - struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; - if (hlist_nulls_empty(&hslot->head)) + if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); - sk_nulls_for_each(sk, node, &hslot->head) { + sk_for_each(sk, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) @@ -2413,7 +2294,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { @@ -2622,12 +2503,12 @@ void __init udp_table_init(struct udp_table *table, const char *name) table->hash2 = table->hash + (table->mask + 1); for (i = 0; i <= table->mask; i++) { - INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); + INIT_HLIST_HEAD(&table->hash[i].head); table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } for (i = 0; i <= table->mask; i++) { - INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); + INIT_HLIST_HEAD(&table->hash2[i].head); table->hash2[i].count = 0; spin_lock_init(&table->hash2[i].lock); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index df1966f3b6ec..3d5ccf4b1412 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -36,10 +36,11 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { int err = -EINVAL; - struct sock *sk; + struct sock *sk = NULL; struct sk_buff *rep; struct net *net = sock_net(in_skb->sk); + rcu_read_lock(); if (req->sdiag_family == AF_INET) sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, @@ -54,9 +55,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_dport, req->id.idiag_if, tbl, NULL); #endif - else - goto out_nosk; - + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + rcu_read_unlock(); err = -ENOENT; if (!sk) goto out_nosk; @@ -96,24 +97,23 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { - int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; s_slot = cb->args[0]; num = s_num = cb->args[1]; for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { - struct sock *sk; - struct hlist_nulls_node *node; struct udp_hslot *hslot = &table->hash[slot]; + struct sock *sk; num = 0; - if (hlist_nulls_empty(&hslot->head)) + if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); - sk_nulls_for_each(sk, node, &hslot->head) { + sk_for_each(sk, &hslot->head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b772a7641fbd..78a7dfd12707 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -213,37 +213,28 @@ static struct sock *udp6_lib_lookup2(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; -begin: result = NULL; badness = -1; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -251,27 +242,10 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot2) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score2(result, net, saddr, sport, - daddr, hnum, dif) < badness)) { - sock_put(result); - goto begin; - } - } return result; } +/* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -279,15 +253,12 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; - rcu_read_lock(); if (hslot->count > 10) { hash2 = udp6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; @@ -309,34 +280,26 @@ struct sock *__udp6_lib_lookup(struct net *net, &in6addr_any, hnum, dif, hslot2, slot2, skb); } - rcu_read_unlock(); return result; } begin: result = NULL; badness = -1; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -344,25 +307,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, saddr, sport, - daddr, dport, dif) < badness)) { - sock_put(result); - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__udp6_lib_lookup); @@ -382,12 +326,24 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, udptable, skb); } +/* Must be called under rcu_read_lock(). + * Does increment socket refcount. + */ +#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { - return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); + struct sock *sk; + + sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, + dif, &udp_table, NULL); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + return sk; } EXPORT_SYMBOL_GPL(udp6_lib_lookup); +#endif /* * This should be easy, if there is something there we @@ -585,7 +541,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk->sk_err = err; sk->sk_error_report(sk); out: - sock_put(sk); + return; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -747,33 +703,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, return true; } -static void flush_stack(struct sock **stack, unsigned int count, - struct sk_buff *skb, unsigned int final) -{ - struct sk_buff *skb1 = NULL; - struct sock *sk; - unsigned int i; - - for (i = 0; i < count; i++) { - sk = stack[i]; - if (likely(!skb1)) - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - if (!skb1) { - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - } - - if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) - skb1 = NULL; - sock_put(sk); - } - if (unlikely(skb1)) - kfree_skb(skb1); -} - static void udp6_csum_zero_error(struct sk_buff *skb) { /* RFC 2460 section 8.1 says that we SHOULD log @@ -792,15 +721,15 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, struct udp_table *udptable, int proto) { - struct sock *sk, *stack[256 / sizeof(struct sock *)]; + struct sock *sk, *first = NULL; const struct udphdr *uh = udp_hdr(skb); - struct hlist_nulls_node *node; unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - int dif = inet6_iif(skb); - unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); + unsigned int offset = offsetof(typeof(*sk), sk_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - bool inner_flushed = false; + int dif = inet6_iif(skb); + struct hlist_node *node; + struct sk_buff *nskb; if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & @@ -811,27 +740,32 @@ start_lookup: offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } - spin_lock(&hslot->lock); - sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { - if (__udp_v6_is_mcast_sock(net, sk, - uh->dest, daddr, - uh->source, saddr, - dif, hnum) && - /* If zero checksum and no_check is not on for - * the socket then skip it. - */ - (uh->check || udp_sk(sk)->no_check6_rx)) { - if (unlikely(count == ARRAY_SIZE(stack))) { - flush_stack(stack, count, skb, ~0); - inner_flushed = true; - count = 0; - } - stack[count++] = sk; - sock_hold(sk); + sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { + if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr, + uh->source, saddr, dif, hnum)) + continue; + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + if (!uh->check && !udp_sk(sk)->no_check6_rx) + continue; + if (!first) { + first = sk; + continue; + } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + continue; } - } - spin_unlock(&hslot->lock); + if (udpv6_queue_rcv_skb(sk, nskb) > 0) + consume_skb(nskb); + } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { @@ -839,13 +773,13 @@ start_lookup: goto start_lookup; } - if (count) { - flush_stack(stack, count, skb, count - 1); + if (first) { + if (udpv6_queue_rcv_skb(first, skb) > 0) + consume_skb(skb); } else { - if (!inner_flushed) - UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); - consume_skb(skb); + kfree_skb(skb); + UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -853,10 +787,10 @@ start_lookup: int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { + const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); - struct sock *sk; struct udphdr *uh; - const struct in6_addr *saddr, *daddr; + struct sock *sk; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) @@ -910,7 +844,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int ret; if (!uh->check && !udp_sk(sk)->no_check6_rx) { - sock_put(sk); udp6_csum_zero_error(skb); goto csum_error; } @@ -920,7 +853,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ip6_compute_pseudo); ret = udpv6_queue_rcv_skb(sk, skb); - sock_put(sk); /* a return value > 0 means to resubmit the input */ if (ret > 0) -- cgit v1.2.3 From 8f6fd83c6c5ec66a4a70c728535ddcdfef4f3697 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 2 Mar 2016 10:09:19 -0500 Subject: rhashtable: accept GFP flags in rhashtable_walk_init In certain cases, the 802.11 mesh pathtable code wants to iterate over all of the entries in the forwarding table from the receive path, which is inside an RCU read-side critical section. Enable walks inside atomic sections by allowing GFP_ATOMIC allocations for the walker state. Change all existing callsites to pass in GFP_KERNEL. Acked-by: Thomas Graf Signed-off-by: Bob Copeland [also adjust gfs2/glock.c and rhashtable tests] Signed-off-by: Johannes Berg --- fs/gfs2/glock.c | 4 ++-- include/linux/rhashtable.h | 3 ++- lib/rhashtable.c | 6 ++++-- lib/test_rhashtable.c | 2 +- net/ipv6/ila/ila_xlat.c | 3 ++- net/netfilter/nft_hash.c | 4 ++-- net/netlink/af_netlink.c | 3 ++- net/sctp/proc.c | 3 ++- 8 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6539131c52a2..4b73bd101bdc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1913,7 +1913,7 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); } return ret; } @@ -1941,7 +1941,7 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); } return ret; } diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 63bd7601b6de..3eef0802a0cd 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -346,7 +346,8 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, struct bucket_table *old_tbl); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, + gfp_t gfp); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index cc808707d1cf..5d845ffd7982 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -487,6 +487,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * rhashtable_walk_init - Initialise an iterator * @ht: Table to walk over * @iter: Hash table Iterator + * @gfp: GFP flags for allocations * * This function prepares a hash table walk. * @@ -504,14 +505,15 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * You must call rhashtable_walk_exit if this function returns * successfully. */ -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, + gfp_t gfp) { iter->ht = ht; iter->p = NULL; iter->slot = 0; iter->skip = 0; - iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL); + iter->walker = kmalloc(sizeof(*iter->walker), gfp); if (!iter->walker) return -ENOMEM; diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 270bf7289b1e..297fdb5e74bd 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -143,7 +143,7 @@ static void test_bucket_stats(struct rhashtable *ht) struct rhashtable_iter hti; struct rhash_head *pos; - err = rhashtable_walk_init(ht, &hti); + err = rhashtable_walk_init(ht, &hti, GFP_KERNEL); if (err) { pr_warn("Test failed: allocation error"); return; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 295ca29a23c3..0b03533453e4 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -501,7 +501,8 @@ static int ila_nl_dump_start(struct netlink_callback *cb) struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; - return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter); + return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, + GFP_KERNEL); } static int ila_nl_dump_done(struct netlink_callback *cb) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3f9d45d3d9b7..6fa016564f90 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -192,7 +192,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; - err = rhashtable_walk_init(&priv->ht, &hti); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); iter->err = err; if (err) return; @@ -248,7 +248,7 @@ static void nft_hash_gc(struct work_struct *work) priv = container_of(work, struct nft_hash, gc_work.work); set = nft_set_container_of(priv); - err = rhashtable_walk_init(&priv->ht, &hti); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); if (err) goto schedule; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 215fc08c02ab..0f16bf635480 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2343,7 +2343,8 @@ static int netlink_walk_start(struct nl_seq_iter *iter) { int err; - err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); + err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti, + GFP_KERNEL); if (err) { iter->link = MAX_LINKS; return err; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5cfac8d5d3b3..6d45d53321e6 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -319,7 +319,8 @@ static int sctp_transport_walk_start(struct seq_file *seq) struct sctp_ht_iter *iter = seq->private; int err; - err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti); + err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti, + GFP_KERNEL); if (err) return err; -- cgit v1.2.3 From 4da56b99d99e5a7df2b7f11e87bfea935f909732 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Apr 2016 14:46:42 +0100 Subject: mm/vmap: Add a notifier for when we run out of vmap address space vmaps are temporary kernel mappings that may be of long duration. Reusing a vmap on an object is preferrable for a driver as the cost of setting up the vmap can otherwise dominate the operation on the object. However, the vmap address space is rather limited on 32bit systems and so we add a notification for vmap pressure in order for the driver to release any cached vmappings. The interface is styled after the oom-notifier where the callees are passed a pointer to an unsigned long counter for them to indicate if they have freed any space. v2: Guard the blocking notifier call with gfpflags_allow_blocking() v3: Correct typo in forward declaration and move to head of file Signed-off-by: Chris Wilson Cc: Andrew Morton Cc: David Rientjes Cc: Roman Peniaev Cc: Mel Gorman Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Acked-by: Andrew Morton # for inclusion via DRM Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1459777603-23618-3-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- include/linux/vmalloc.h | 4 ++++ mm/vmalloc.c | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d1f1d338af20..8b51df3ab334 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -8,6 +8,7 @@ #include struct vm_area_struct; /* vma defining user mapping in mm_types.h */ +struct notifier_block; /* in notifier.h */ /* bits in flags of vmalloc's vm_struct below */ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ @@ -187,4 +188,7 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) #define VMALLOC_TOTAL 0UL #endif +int register_vmap_purge_notifier(struct notifier_block *nb); +int unregister_vmap_purge_notifier(struct notifier_block *nb); + #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index fb42a5bffe47..12d27ac303ae 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -344,6 +345,8 @@ static void __insert_vmap_area(struct vmap_area *va) static void purge_vmap_area_lazy(void); +static BLOCKING_NOTIFIER_HEAD(vmap_notify_list); + /* * Allocate a region of KVA of the specified size and alignment, within the * vstart and vend. @@ -363,6 +366,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, BUG_ON(offset_in_page(size)); BUG_ON(!is_power_of_2(align)); + might_sleep_if(gfpflags_allow_blocking(gfp_mask)); + va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) @@ -468,6 +473,16 @@ overflow: purged = 1; goto retry; } + + if (gfpflags_allow_blocking(gfp_mask)) { + unsigned long freed = 0; + blocking_notifier_call_chain(&vmap_notify_list, 0, &freed); + if (freed > 0) { + purged = 0; + goto retry; + } + } + if (printk_ratelimit()) pr_warn("vmap allocation for size %lu failed: " "use vmalloc= to increase size.\n", size); @@ -475,6 +490,18 @@ overflow: return ERR_PTR(-EBUSY); } +int register_vmap_purge_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&vmap_notify_list, nb); +} +EXPORT_SYMBOL_GPL(register_vmap_purge_notifier); + +int unregister_vmap_purge_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&vmap_notify_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); + static void __free_vmap_area(struct vmap_area *va) { BUG_ON(RB_EMPTY_NODE(&va->rb_node)); -- cgit v1.2.3 From 3c5bcb2e1930bdbccd14a49660895f014349b51d Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Thu, 17 Mar 2016 15:02:53 +0200 Subject: ieee80211: support parsing Fine Timing Measurement action frame Add definition for Fine Timing Measurement (FTM) frame format as defined in IEEE802.11-REVmcD5.0 section 9.6.8.33 Signed-off-by: Avraham Stern Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 3b1f6cef9513..bf9706c5b0bd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -7,6 +7,7 @@ * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright (c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1011,6 +1012,16 @@ struct ieee80211_mgmt { u8 tpc_elem_length; struct ieee80211_tpc_report_ie tpc; } __packed tpc_report; + struct { + u8 action_code; + u8 dialog_token; + u8 follow_up; + u8 tod[6]; + u8 toa[6]; + __le16 tod_error; + __le16 toa_error; + u8 variable[0]; + } __packed ftm; } u; } __packed action; } u; -- cgit v1.2.3 From c663e5f56737757db4d0b317c510ab505f93cecb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 22 Mar 2016 10:51:16 +0100 Subject: gpio: support native single-ended hardware drivers Some GPIO controllers has a special hardware bit we can flip to support open drain / source. This means that on these hardwares we do not need to emulate OD/OS by setting the line to input instead of actively driving it high/low. Add an optional vtable callback to the driver set_single_ended() so that driver can implement this in hardware if they have it. We may need a pinctrl_gpio_set_config() call at some point to propagate this down to a backing pin control device on systems with split GPIO/pin control. Reported-by: Michael Hennerich Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 52 ++++++++++++++++++++++++++++++++------------- include/linux/gpio/driver.h | 25 +++++++++++++++++++++- 2 files changed, 61 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 72065532c1c7..1edc830a1b51 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1509,8 +1509,8 @@ EXPORT_SYMBOL_GPL(gpiod_direction_input); static int _gpiod_direction_output_raw(struct gpio_desc *desc, int value) { - struct gpio_chip *chip; - int status = -EINVAL; + struct gpio_chip *gc = desc->gdev->chip; + int ret; /* GPIOs used for IRQs shall not be set as output */ if (test_bit(FLAG_USED_AS_IRQ, &desc->flags)) { @@ -1520,28 +1520,50 @@ static int _gpiod_direction_output_raw(struct gpio_desc *desc, int value) return -EIO; } - /* Open drain pin should not be driven to 1 */ - if (value && test_bit(FLAG_OPEN_DRAIN, &desc->flags)) - return gpiod_direction_input(desc); - - /* Open source pin should not be driven to 0 */ - if (!value && test_bit(FLAG_OPEN_SOURCE, &desc->flags)) - return gpiod_direction_input(desc); + if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) { + /* First see if we can enable open drain in hardware */ + if (gc->set_single_ended) { + ret = gc->set_single_ended(gc, gpio_chip_hwgpio(desc), + LINE_MODE_OPEN_DRAIN); + if (!ret) + goto set_output_value; + } + /* Emulate open drain by not actively driving the line high */ + if (value) + return gpiod_direction_input(desc); + } + else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) { + if (gc->set_single_ended) { + ret = gc->set_single_ended(gc, gpio_chip_hwgpio(desc), + LINE_MODE_OPEN_SOURCE); + if (!ret) + goto set_output_value; + } + /* Emulate open source by not actively driving the line low */ + if (!value) + return gpiod_direction_input(desc); + } else { + /* Make sure to disable open drain/source hardware, if any */ + if (gc->set_single_ended) + gc->set_single_ended(gc, + gpio_chip_hwgpio(desc), + LINE_MODE_PUSH_PULL); + } - chip = desc->gdev->chip; - if (!chip->set || !chip->direction_output) { +set_output_value: + if (!gc->set || !gc->direction_output) { gpiod_warn(desc, "%s: missing set() or direction_output() operations\n", __func__); return -EIO; } - status = chip->direction_output(chip, gpio_chip_hwgpio(desc), value); - if (status == 0) + ret = gc->direction_output(gc, gpio_chip_hwgpio(desc), value); + if (!ret) set_bit(FLAG_IS_OUT, &desc->flags); trace_gpio_value(desc_to_gpio(desc), 0, value); - trace_gpio_direction(desc_to_gpio(desc), 0, status); - return status; + trace_gpio_direction(desc_to_gpio(desc), 0, ret); + return ret; } /** diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index bee976f82788..50882e09289b 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -19,6 +19,18 @@ struct gpio_device; #ifdef CONFIG_GPIOLIB +/** + * enum single_ended_mode - mode for single ended operation + * @LINE_MODE_PUSH_PULL: normal mode for a GPIO line, drive actively high/low + * @LINE_MODE_OPEN_DRAIN: set line to be open drain + * @LINE_MODE_OPEN_SOURCE: set line to be open source + */ +enum single_ended_mode { + LINE_MODE_PUSH_PULL, + LINE_MODE_OPEN_DRAIN, + LINE_MODE_OPEN_SOURCE, +}; + /** * struct gpio_chip - abstract a GPIO controller * @label: a functional name for the GPIO device, such as a part @@ -38,7 +50,15 @@ struct gpio_device; * @set: assigns output value for signal "offset" * @set_multiple: assigns output values for multiple signals defined by "mask" * @set_debounce: optional hook for setting debounce time for specified gpio in - * interrupt triggered gpio chips + * interrupt triggered gpio chips + * @set_single_ended: optional hook for setting a line as open drain, open + * source, or non-single ended (restore from open drain/source to normal + * push-pull mode) this should be implemented if the hardware supports + * open drain or open source settings. The GPIOlib will otherwise try + * to emulate open drain/source by not actively driving lines high/low + * if a consumer request this. The driver may return -ENOTSUPP if e.g. + * it supports just open drain but not open source and is called + * with LINE_MODE_OPEN_SOURCE as mode argument. * @to_irq: optional hook supporting non-static gpio_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code @@ -130,6 +150,9 @@ struct gpio_chip { int (*set_debounce)(struct gpio_chip *chip, unsigned offset, unsigned debounce); + int (*set_single_ended)(struct gpio_chip *chip, + unsigned offset, + enum single_ended_mode mode); int (*to_irq)(struct gpio_chip *chip, unsigned offset); -- cgit v1.2.3 From 627d2d6b550094d88f9e518e15967e7bf906ebbf Mon Sep 17 00:00:00 2001 From: samanthakumar Date: Tue, 5 Apr 2016 12:41:16 -0400 Subject: udp: enable MSG_PEEK at non-zero offset Enable peeking at UDP datagrams at the offset specified with socket option SOL_SOCKET/SO_PEEK_OFF. Peek at any datagram in the queue, up to the end of the given datagram. Implement the SO_PEEK_OFF semantics introduced in commit ef64a54f6e55 ("sock: Introduce the SO_PEEK_OFF sock option"). Increase the offset on peek, decrease it on regular reads. When peeking, always checksum the packet immediately, to avoid recomputation on subsequent peeks and final read. The socket lock is not held for the duration of udp_recvmsg, so peek and read operations can run concurrently. Only the last store to sk_peek_off is preserved. Signed-off-by: Sam Kumar Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++++- include/net/sock.h | 2 ++ net/core/datagram.c | 9 ++++++--- net/core/sock.c | 9 +++++++++ net/ipv4/af_inet.c | 1 + net/ipv4/udp.c | 22 +++++++++++----------- net/ipv6/af_inet6.c | 1 + net/ipv6/udp.c | 22 +++++++++++----------- 8 files changed, 47 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 15d0df943466..007381270ff8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); +void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len); +static inline void skb_free_datagram_locked(struct sock *sk, + struct sk_buff *skb) +{ + __skb_free_datagram_locked(sk, skb, 0); +} int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); diff --git a/include/net/sock.h b/include/net/sock.h index b75998952482..1decb7a22261 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -457,6 +457,8 @@ struct sock { #define SK_CAN_REUSE 1 #define SK_FORCE_REUSE 2 +int sk_set_peek_off(struct sock *sk, int val); + static inline int sk_peek_offset(struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { diff --git a/net/core/datagram.c b/net/core/datagram.c index fa9dc6450b08..b7de71f8d5d3 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(skb_free_datagram); -void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) +void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) { bool slow; if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + else if (likely(!atomic_dec_and_test(&skb->users))) { + sk_peek_offset_bwd(sk, len); return; + } slow = lock_sock_fast(sk); + sk_peek_offset_bwd(sk, len); skb_orphan(skb); sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); @@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); } -EXPORT_SYMBOL(skb_free_datagram_locked); +EXPORT_SYMBOL(__skb_free_datagram_locked); /** * skb_kill_datagram - Free a datagram skbuff forcibly diff --git a/net/core/sock.c b/net/core/sock.c index e12197b359fd..2ce76e82857f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2187,6 +2187,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount) } EXPORT_SYMBOL(__sk_mem_reclaim); +int sk_set_peek_off(struct sock *sk, int val) +{ + if (val < 0) + return -EINVAL; + + sk->sk_peek_off = val; + return 0; +} +EXPORT_SYMBOL_GPL(sk_set_peek_off); /* * Set of default routines for initialising struct proto_ops when diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e481992dbae..a38b9910af60 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, + .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf747e86ce52..d80312ddbb8a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1294,7 +1294,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, off = 0; + int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; @@ -1304,15 +1304,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: + peeking = off = sk_peek_offset(sk, flags); skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &off, &err); if (!skb) - goto out; + return err; ulen = skb->len; copied = len; - if (copied > ulen) - copied = ulen; + if (copied > ulen - off) + copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; @@ -1322,16 +1323,16 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { checksum_valid = !udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, 0, msg, copied); + err = skb_copy_datagram_msg(skb, off, msg, copied); else { - err = skb_copy_and_csum_datagram_msg(skb, 0, msg); + err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; @@ -1344,7 +1345,8 @@ try_again: UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - goto out_free; + skb_free_datagram_locked(sk, skb); + return err; } if (!peeked) @@ -1368,9 +1370,7 @@ try_again: if (flags & MSG_TRUNC) err = ulen; -out_free: - skb_free_datagram_locked(sk, skb); -out: + __skb_free_datagram_locked(sk, skb, peeking ? -err : err); return err; csum_copy_err: diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b11c37cfd67c..2b78aad0d52f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -561,6 +561,7 @@ const struct proto_ops inet6_dgram_ops = { .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 84c8d7b66820..87bd7aff88b4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -357,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, off = 0; + int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; @@ -371,15 +371,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: + peeking = off = sk_peek_offset(sk, flags); skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &off, &err); if (!skb) - goto out; + return err; ulen = skb->len; copied = len; - if (copied > ulen) - copied = ulen; + if (copied > ulen - off) + copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; @@ -391,16 +392,16 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { checksum_valid = !udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, 0, msg, copied); + err = skb_copy_datagram_msg(skb, off, msg, copied); else { - err = skb_copy_and_csum_datagram_msg(skb, 0, msg); + err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; } @@ -417,7 +418,8 @@ try_again: UDP_MIB_INERRORS, is_udplite); } - goto out_free; + skb_free_datagram_locked(sk, skb); + return err; } if (!peeked) { if (is_udp4) @@ -465,9 +467,7 @@ try_again: if (flags & MSG_TRUNC) err = ulen; -out_free: - skb_free_datagram_locked(sk, skb); -out: + __skb_free_datagram_locked(sk, skb, peeking ? -err : err); return err; csum_copy_err: -- cgit v1.2.3 From f9cd476123ced488e628339becedb2cf3243a58a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 4 Apr 2016 22:44:59 +0200 Subject: dmaengine: pl08x: allocate OF slave channel data at probe time The current OF translation of channels can never work with any DMA client using the DMA channels directly: the only way to get the channels initialized properly is in the dma_async_device_register() call, where chan->dev etc is allocated and initialized. Allocate and initialize all possible DMA channels and only augment a target channel with the periph_buses at of_xlate(). Remove some const settings to make things work. Cc: Maxime Ripard Tested-by: Joachim Eastwood Tested-by: Johannes Stezenbach Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 86 +++++++++++++++++++++++++++++++--------------- include/linux/amba/pl08x.h | 2 +- 2 files changed, 59 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 9b42c0588550..81db1c4811ce 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -107,16 +107,20 @@ struct pl08x_driver_data; /** * struct vendor_data - vendor-specific config parameters for PL08x derivatives * @channels: the number of channels available in this variant + * @signals: the number of request signals available from the hardware * @dualmaster: whether this version supports dual AHB masters or not. * @nomadik: whether the channels have Nomadik security extension bits * that need to be checked for permission before use and some registers are * missing * @pl080s: whether this version is a PL080S, which has separate register and * LLI word for transfer size. + * @max_transfer_size: the maximum single element transfer size for this + * PL08x variant. */ struct vendor_data { u8 config_offset; u8 channels; + u8 signals; bool dualmaster; bool nomadik; bool pl080s; @@ -235,7 +239,7 @@ struct pl08x_dma_chan { struct virt_dma_chan vc; struct pl08x_phy_chan *phychan; const char *name; - const struct pl08x_channel_data *cd; + struct pl08x_channel_data *cd; struct dma_slave_config cfg; struct pl08x_txd *at; struct pl08x_driver_data *host; @@ -1909,6 +1913,12 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, if (slave) { chan->cd = &pl08x->pd->slave_channels[i]; + /* + * Some implementations have muxed signals, whereas some + * use a mux in front of the signals and need dynamic + * assignment of signals. + */ + chan->signal = i; pl08x_dma_slave_init(chan); } else { chan->cd = &pl08x->pd->memcpy_channel; @@ -2050,40 +2060,33 @@ static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct pl08x_driver_data *pl08x = ofdma->of_dma_data; - struct pl08x_channel_data *data; - struct pl08x_dma_chan *chan; struct dma_chan *dma_chan; + struct pl08x_dma_chan *plchan; if (!pl08x) return NULL; - if (dma_spec->args_count != 2) + if (dma_spec->args_count != 2) { + dev_err(&pl08x->adev->dev, + "DMA channel translation requires two cells\n"); return NULL; + } dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); - if (dma_chan) - return dma_get_slave_channel(dma_chan); - - chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data), - GFP_KERNEL); - if (!chan) + if (!dma_chan) { + dev_err(&pl08x->adev->dev, + "DMA slave channel not found\n"); return NULL; + } - data = (void *)&chan[1]; - data->bus_id = "(none)"; - data->periph_buses = dma_spec->args[1]; - - chan->cd = data; - chan->host = pl08x; - chan->slave = true; - chan->name = data->bus_id; - chan->state = PL08X_CHAN_IDLE; - chan->signal = dma_spec->args[0]; - chan->vc.desc_free = pl08x_desc_free; - - vchan_init(&chan->vc, &pl08x->slave); + plchan = to_pl08x_chan(dma_chan); + dev_dbg(&pl08x->adev->dev, + "translated channel for signal %d\n", + dma_spec->args[0]); - return dma_get_slave_channel(&chan->vc.chan); + /* Augment channel data for applicable AHB buses */ + plchan->cd->periph_buses = dma_spec->args[1]; + return dma_get_slave_channel(dma_chan); } static int pl08x_of_probe(struct amba_device *adev, @@ -2091,9 +2094,11 @@ static int pl08x_of_probe(struct amba_device *adev, struct device_node *np) { struct pl08x_platform_data *pd; + struct pl08x_channel_data *chanp = NULL; u32 cctl_memcpy = 0; u32 val; int ret; + int i; pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); if (!pd) @@ -2195,6 +2200,27 @@ static int pl08x_of_probe(struct amba_device *adev, /* Use the buses that can access memory, obviously */ pd->memcpy_channel.periph_buses = pd->mem_buses; + /* + * Allocate channel data for all possible slave channels (one + * for each possible signal), channels will then be allocated + * for a device and have it's AHB interfaces set up at + * translation time. + */ + chanp = devm_kcalloc(&adev->dev, + pl08x->vd->signals, + sizeof(struct pl08x_channel_data), + GFP_KERNEL); + if (!chanp) + return -ENOMEM; + + pd->slave_channels = chanp; + for (i = 0; i < pl08x->vd->signals; i++) { + /* chanp->periph_buses will be assigned at translation */ + chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i); + chanp++; + } + pd->num_slave_channels = pl08x->vd->signals; + pl08x->pd = pd; return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, @@ -2234,6 +2260,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_pl08x; } + /* Assign useful pointers to the driver state */ + pl08x->adev = adev; + pl08x->vd = vd; + /* Initialize memcpy engine */ dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); pl08x->memcpy.dev = &adev->dev; @@ -2284,10 +2314,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) } } - /* Assign useful pointers to the driver state */ - pl08x->adev = adev; - pl08x->vd = vd; - /* By default, AHB1 only. If dualmaster, from platform */ pl08x->lli_buses = PL08X_AHB1; pl08x->mem_buses = PL08X_AHB1; @@ -2438,6 +2464,7 @@ out_no_pl08x: static struct vendor_data vendor_pl080 = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 16, .dualmaster = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2445,6 +2472,7 @@ static struct vendor_data vendor_pl080 = { static struct vendor_data vendor_nomadik = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 32, .dualmaster = true, .nomadik = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, @@ -2453,6 +2481,7 @@ static struct vendor_data vendor_nomadik = { static struct vendor_data vendor_pl080s = { .config_offset = PL080S_CH_CONFIG, .channels = 8, + .signals = 32, .pl080s = true, .max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2460,6 +2489,7 @@ static struct vendor_data vendor_pl080s = { static struct vendor_data vendor_pl081 = { .config_offset = PL080_CH_CONFIG, .channels = 2, + .signals = 16, .dualmaster = false, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 10fe2a211c2e..27e9ec8778eb 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -86,7 +86,7 @@ struct pl08x_channel_data { * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 */ struct pl08x_platform_data { - const struct pl08x_channel_data *slave_channels; + struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; struct pl08x_channel_data memcpy_channel; int (*get_xfer_signal)(const struct pl08x_channel_data *); -- cgit v1.2.3 From fa59507f720077a856c9952a31cfd45cd97ef6f9 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 30 Mar 2016 12:56:28 +0300 Subject: usb: otg-fsm: Add documentation for struct otg_fsm struct otg_fsm is the interface to the OTG state machine. Document the input, output and internal state variables. Definations are taken from Table 7-2 and Table 7-4 of the USB OTG & EH Specification Rev.2.0 Re-arrange some of the members as per use case for more clarity. Signed-off-by: Roger Quadros Acked-by: Peter Chen Signed-off-by: Peter Chen --- include/linux/usb/otg-fsm.h | 90 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 24198e16f849..8eec0c261be5 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -72,37 +72,113 @@ enum otg_fsm_timer { NUM_OTG_FSM_TIMERS, }; -/* OTG state machine according to the OTG spec */ +/** + * struct otg_fsm - OTG state machine according to the OTG spec + * + * OTG hardware Inputs + * + * Common inputs for A and B device + * @id: TRUE for B-device, FALSE for A-device. + * @adp_change: TRUE when current ADP measurement (n) value, compared to the + * ADP measurement taken at n-2, differs by more than CADP_THR + * @power_up: TRUE when the OTG device first powers up its USB system and + * ADP measurement taken if ADP capable + * + * A-Device state inputs + * @a_srp_det: TRUE if the A-device detects SRP + * @a_vbus_vld: TRUE when VBUS voltage is in regulation + * @b_conn: TRUE if the A-device detects connection from the B-device + * @a_bus_resume: TRUE when the B-device detects that the A-device is signaling + * a resume (K state) + * B-Device state inputs + * @a_bus_suspend: TRUE when the B-device detects that the A-device has put the + * bus into suspend + * @a_conn: TRUE if the B-device detects a connection from the A-device + * @b_se0_srp: TRUE when the line has been at SE0 for more than the minimum + * time before generating SRP + * @b_ssend_srp: TRUE when the VBUS has been below VOTG_SESS_VLD for more than + * the minimum time before generating SRP + * @b_sess_vld: TRUE when the B-device detects that the voltage on VBUS is + * above VOTG_SESS_VLD + * @test_device: TRUE when the B-device switches to B-Host and detects an OTG + * test device. This must be set by host/hub driver + * + * Application inputs (A-Device) + * @a_bus_drop: TRUE when A-device application needs to power down the bus + * @a_bus_req: TRUE when A-device application wants to use the bus. + * FALSE to suspend the bus + * + * Application inputs (B-Device) + * @b_bus_req: TRUE during the time that the Application running on the + * B-device wants to use the bus + * + * Auxilary inputs (OTG v1.3 only. Obsolete now.) + * @a_sess_vld: TRUE if the A-device detects that VBUS is above VA_SESS_VLD + * @b_bus_suspend: TRUE when the A-device detects that the B-device has put + * the bus into suspend + * @b_bus_resume: TRUE when the A-device detects that the B-device is signaling + * resume on the bus + * + * OTG Output status. Read only for users. Updated by OTG FSM helpers defined + * in this file + * + * Outputs for Both A and B device + * @drv_vbus: TRUE when A-device is driving VBUS + * @loc_conn: TRUE when the local device has signaled that it is connected + * to the bus + * @loc_sof: TRUE when the local device is generating activity on the bus + * @adp_prb: TRUE when the local device is in the process of doing + * ADP probing + * + * Outputs for B-device state + * @adp_sns: TRUE when the B-device is in the process of carrying out + * ADP sensing + * @data_pulse: TRUE when the B-device is performing data line pulsing + * + * Internal Variables + * + * a_set_b_hnp_en: TRUE when the A-device has successfully set the + * b_hnp_enable bit in the B-device. + * Unused as OTG fsm uses otg->host->b_hnp_enable instead + * b_srp_done: TRUE when the B-device has completed initiating SRP + * b_hnp_enable: TRUE when the B-device has accepted the + * SetFeature(b_hnp_enable) B-device. + * Unused as OTG fsm uses otg->gadget->b_hnp_enable instead + * a_clr_err: Asserted (by application ?) to clear a_vbus_err due to an + * overcurrent condition and causes the A-device to transition + * to a_wait_vfall + */ struct otg_fsm { /* Input */ int id; int adp_change; int power_up; - int test_device; - int a_bus_drop; - int a_bus_req; int a_srp_det; int a_vbus_vld; int b_conn; int a_bus_resume; int a_bus_suspend; int a_conn; - int b_bus_req; int b_se0_srp; int b_ssend_srp; int b_sess_vld; + int test_device; + int a_bus_drop; + int a_bus_req; + int b_bus_req; + /* Auxilary inputs */ int a_sess_vld; int b_bus_resume; int b_bus_suspend; /* Output */ - int data_pulse; int drv_vbus; int loc_conn; int loc_sof; int adp_prb; int adp_sns; + int data_pulse; /* Internal variables */ int a_set_b_hnp_en; @@ -110,7 +186,7 @@ struct otg_fsm { int b_hnp_enable; int a_clr_err; - /* Informative variables */ + /* Informative variables. All unused as of now */ int a_bus_drop_inf; int a_bus_req_inf; int a_clr_err_inf; -- cgit v1.2.3 From 4e332df63487418ec512c3c376c07df9ab3ae035 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 30 Mar 2016 12:56:29 +0300 Subject: usb: otg-fsm: support multiple instances Move the state_changed variable into struct otg_fsm so that we can support multiple instances. Signed-off-by: Roger Quadros Acked-by: Peter Chen Signed-off-by: Peter Chen --- drivers/usb/common/usb-otg-fsm.c | 10 ++++------ include/linux/usb/otg-fsm.h | 1 + 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 504708f59b93..9059b7dc185e 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -61,8 +61,6 @@ static int otg_set_protocol(struct otg_fsm *fsm, int protocol) return 0; } -static int state_changed; - /* Called when leaving a state. Do state clean up jobs here */ static void otg_leave_state(struct otg_fsm *fsm, enum usb_otg_state old_state) { @@ -208,7 +206,6 @@ static void otg_start_hnp_polling(struct otg_fsm *fsm) /* Called when entering a state */ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state) { - state_changed = 1; if (fsm->otg->state == new_state) return 0; VDBG("Set state: %s\n", usb_otg_state_string(new_state)); @@ -324,6 +321,7 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state) } fsm->otg->state = new_state; + fsm->state_changed = 1; return 0; } @@ -335,7 +333,7 @@ int otg_statemachine(struct otg_fsm *fsm) mutex_lock(&fsm->lock); state = fsm->otg->state; - state_changed = 0; + fsm->state_changed = 0; /* State machine state change judgement */ switch (state) { @@ -448,7 +446,7 @@ int otg_statemachine(struct otg_fsm *fsm) } mutex_unlock(&fsm->lock); - VDBG("quit statemachine, changed = %d\n", state_changed); - return state_changed; + VDBG("quit statemachine, changed = %d\n", fsm->state_changed); + return fsm->state_changed; } EXPORT_SYMBOL_GPL(otg_statemachine); diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 8eec0c261be5..7a0350535cb1 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -210,6 +210,7 @@ struct otg_fsm { struct mutex lock; u8 *host_req_flag; struct delayed_work hnp_polling_work; + bool state_changed; }; struct otg_fsm_ops { -- cgit v1.2.3 From 49ddf8e6e2347cffdcf83d1ca2d04ff929820178 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 31 Mar 2016 20:02:10 +0300 Subject: mac80211: add fast-rx path The regular RX path has a lot of code, but with a few assumptions on the hardware it's possible to reduce the amount of code significantly. Currently the assumptions on the driver are the following: * hardware/driver reordering buffer (if supporting aggregation) * hardware/driver decryption & PN checking (if using encryption) * hardware/driver did de-duplication * hardware/driver did A-MSDU deaggregation * AP_LINK_PS is used (in AP mode) * no client powersave handling in mac80211 (in client mode) of which some are actually checked per packet: * de-duplication * PN checking * decryption and additionally packets must * not be A-MSDU (have been deaggregated by driver/device) * be data packets * not be fragmented * be unicast * have RFC 1042 header Additionally dynamically we assume: * no encryption or CCMP/GCMP, TKIP/WEP/other not allowed * station must be authorized * 4-addr format not enabled Some data needed for the RX path is cached in a new per-station "fast_rx" structure, so that we only need to look at this and the packet, no other memory when processing packets on the fast RX path. After doing the above per-packet checks, the data path collapses down to a pretty simple conversion function taking advantage of the data cached in the small fast_rx struct. This should speed up the RX processing, and will make it easier to reason about parallelizing RX (for which statistics will need to be per-CPU still.) Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++ net/mac80211/cfg.c | 10 +- net/mac80211/ieee80211_i.h | 5 + net/mac80211/key.c | 1 + net/mac80211/mlme.c | 9 ++ net/mac80211/rx.c | 351 +++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/sta_info.c | 2 + net/mac80211/sta_info.h | 34 +++++ 8 files changed, 419 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index bf9706c5b0bd..113bfc468a4d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -638,6 +638,16 @@ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; } +/** + * ieee80211_is_frag - check if a frame is a fragment + * @hdr: 802.11 header of the frame + */ +static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) +{ + return ieee80211_has_morefrags(hdr->frame_control) || + hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 62a90f270f03..fc4730b938d0 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -65,11 +65,13 @@ static int ieee80211_change_iface(struct wiphy *wiphy, return ret; if (type == NL80211_IFTYPE_AP_VLAN && - params && params->use_4addr == 0) + params && params->use_4addr == 0) { RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); - else if (type == NL80211_IFTYPE_STATION && - params && params->use_4addr >= 0) + ieee80211_check_fast_rx_iface(sdata); + } else if (type == NL80211_IFTYPE_STATION && + params && params->use_4addr >= 0) { sdata->u.mgd.use_4addr = params->use_4addr; + } if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) { struct ieee80211_local *local = sdata->local; @@ -1367,6 +1369,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, rcu_assign_pointer(vlansdata->u.vlan.sta, sta); new_4addr = true; + __ieee80211_check_fast_rx_iface(vlansdata); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && @@ -1889,6 +1892,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; else sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; + ieee80211_check_fast_rx_iface(sdata); } if (params->ht_opmode >= 0) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c8945e2d8a86..6243109979ed 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1494,6 +1494,11 @@ u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp); +void ieee80211_check_fast_rx(struct sta_info *sta); +void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_clear_fast_rx(struct sta_info *sta); + /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 3df7b0392d30..edd6f2945f69 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -338,6 +338,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } else { rcu_assign_pointer(sta->gtk[idx], new); } + ieee80211_check_fast_rx(sta); } else { defunikey = old && old == key_mtx_dereference(sdata->local, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2112df4ffb7b..d3c75ac8a029 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2217,6 +2217,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) const u8 *ssid; u8 *dst = ifmgd->associated->bssid; u8 unicast_limit = max(1, max_probe_tries - 3); + struct sta_info *sta; /* * Try sending broadcast probe requests for the last three @@ -2235,6 +2236,14 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) */ ifmgd->probe_send_count++; + if (dst) { + mutex_lock(&sdata->local->sta_mtx); + sta = sta_info_get(sdata, dst); + if (!WARN_ON(!sta)) + ieee80211_check_fast_rx(sta); + mutex_unlock(&sdata->local->sta_mtx); + } + if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { ifmgd->nullfunc_failed = false; ieee80211_send_nullfunc(sdata->local, sdata, false); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2863832b0db4..96f8bbf21649 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3508,6 +3508,342 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return false; } +void ieee80211_check_fast_rx(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_key *key; + struct ieee80211_fast_rx fastrx = { + .dev = sdata->dev, + .vif_type = sdata->vif.type, + .control_port_protocol = sdata->control_port_protocol, + }, *old, *new = NULL; + bool assign = false; + + /* use sparse to check that we don't return without updating */ + __acquire(check_fast_rx); + + BUILD_BUG_ON(sizeof(fastrx.rfc1042_hdr) != sizeof(rfc1042_header)); + BUILD_BUG_ON(sizeof(fastrx.rfc1042_hdr) != ETH_ALEN); + ether_addr_copy(fastrx.rfc1042_hdr, rfc1042_header); + ether_addr_copy(fastrx.vif_addr, sdata->vif.addr); + + /* fast-rx doesn't do reordering */ + if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) && + !ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) + goto clear; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + /* 4-addr is harder to deal with, later maybe */ + if (sdata->u.mgd.use_4addr) + goto clear; + /* software powersave is a huge mess, avoid all of it */ + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) + goto clear; + if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) && + !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) + goto clear; + if (sta->sta.tdls) { + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2); + fastrx.expected_ds_bits = 0; + } else { + fastrx.sta_notify = sdata->u.mgd.probe_send_count > 0; + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.expected_ds_bits = + cpu_to_le16(IEEE80211_FCTL_FROMDS); + } + break; + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_AP: + /* parallel-rx requires this, at least with calls to + * ieee80211_sta_ps_transition() + */ + if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) + goto clear; + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2); + fastrx.expected_ds_bits = cpu_to_le16(IEEE80211_FCTL_TODS); + + fastrx.internal_forward = + !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || + !sdata->u.vlan.sta); + break; + default: + goto clear; + } + + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + goto clear; + + rcu_read_lock(); + key = rcu_dereference(sta->ptk[sta->ptk_idx]); + if (key) { + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: + /* we don't want to deal with MMIC in fast-rx */ + goto clear_rcu; + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + break; + default: + /* we also don't want to deal with WEP or cipher scheme + * since those require looking up the key idx in the + * frame, rather than assuming the PTK is used + * (we need to revisit this once we implement the real + * PTK index, which is now valid in the spec, but we + * haven't implemented that part yet) + */ + goto clear_rcu; + } + + fastrx.key = true; + fastrx.icv_len = key->conf.icv_len; + } + + assign = true; + clear_rcu: + rcu_read_unlock(); + clear: + __release(check_fast_rx); + + if (assign) + new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL); + + spin_lock_bh(&sta->lock); + old = rcu_dereference_protected(sta->fast_rx, true); + rcu_assign_pointer(sta->fast_rx, new); + spin_unlock_bh(&sta->lock); + + if (old) + kfree_rcu(old, rcu_head); +} + +void ieee80211_clear_fast_rx(struct sta_info *sta) +{ + struct ieee80211_fast_rx *old; + + spin_lock_bh(&sta->lock); + old = rcu_dereference_protected(sta->fast_rx, true); + RCU_INIT_POINTER(sta->fast_rx, NULL); + spin_unlock_bh(&sta->lock); + + if (old) + kfree_rcu(old, rcu_head); +} + +void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + lockdep_assert_held(&local->sta_mtx); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata && + (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) + continue; + ieee80211_check_fast_rx(sta); + } +} + +void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->sta_mtx); + __ieee80211_check_fast_rx_iface(sdata); + mutex_unlock(&local->sta_mtx); +} + +static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, + struct ieee80211_fast_rx *fast_rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct sta_info *sta = rx->sta; + int orig_len = skb->len; + int snap_offs = ieee80211_hdrlen(hdr->frame_control); + struct { + u8 snap[sizeof(rfc1042_header)]; + __be16 proto; + } *payload __aligned(2); + struct { + u8 da[ETH_ALEN]; + u8 sa[ETH_ALEN]; + } addrs __aligned(2); + + /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write + * to a common data structure; drivers can implement that per queue + * but we don't have that information in mac80211 + */ + if (!(status->flag & RX_FLAG_DUP_VALIDATED)) + return false; + +#define FAST_RX_CRYPT_FLAGS (RX_FLAG_PN_VALIDATED | RX_FLAG_DECRYPTED) + + /* If using encryption, we also need to have: + * - PN_VALIDATED: similar, but the implementation is tricky + * - DECRYPTED: necessary for PN_VALIDATED + */ + if (fast_rx->key && + (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS) + return false; + + /* we don't deal with A-MSDU deaggregation here */ + if (status->rx_flags & IEEE80211_RX_AMSDU) + return false; + + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) + return false; + + if (unlikely(ieee80211_is_frag(hdr))) + return false; + + /* Since our interface address cannot be multicast, this + * implicitly also rejects multicast frames without the + * explicit check. + * + * We shouldn't get any *data* frames not addressed to us + * (AP mode will accept multicast *management* frames), but + * punting here will make it go through the full checks in + * ieee80211_accept_frame(). + */ + if (!ether_addr_equal(fast_rx->vif_addr, hdr->addr1)) + return false; + + if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS)) != + fast_rx->expected_ds_bits) + goto drop; + + /* assign the key to drop unencrypted frames (later) + * and strip the IV/MIC if necessary + */ + if (fast_rx->key && !(status->flag & RX_FLAG_IV_STRIPPED)) { + /* GCMP header length is the same */ + snap_offs += IEEE80211_CCMP_HDR_LEN; + } + + if (!pskb_may_pull(skb, snap_offs + sizeof(*payload))) + goto drop; + payload = (void *)(skb->data + snap_offs); + + if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr)) + return false; + + /* Don't handle these here since they require special code. + * Accept AARP and IPX even though they should come with a + * bridge-tunnel header - but if we get them this way then + * there's little point in discarding them. + */ + if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) || + payload->proto == fast_rx->control_port_protocol)) + return false; + + /* after this point, don't punt to the slowpath! */ + + if (rx->key && !(status->flag & RX_FLAG_MIC_STRIPPED) && + pskb_trim(skb, skb->len - fast_rx->icv_len)) + goto drop; + + if (unlikely(fast_rx->sta_notify)) { + ieee80211_sta_rx_notify(rx->sdata, hdr); + fast_rx->sta_notify = false; + } + + /* statistics part of ieee80211_rx_h_sta_process() */ + sta->rx_stats.last_rx = jiffies; + sta->rx_stats.last_rate = sta_stats_encode_rate(status); + + sta->rx_stats.fragments++; + + if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { + sta->rx_stats.last_signal = status->signal; + ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal); + } + + if (status->chains) { + int i; + + sta->rx_stats.chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + sta->rx_stats.chain_signal_last[i] = signal; + ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + -signal); + } + } + /* end of statistics */ + + if (rx->key && !ieee80211_has_protected(hdr->frame_control)) + goto drop; + + /* do the header conversion - first grab the addresses */ + ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs); + ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs); + /* remove the SNAP but leave the ethertype */ + skb_pull(skb, snap_offs + sizeof(rfc1042_header)); + /* push the addresses in front */ + memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs)); + + skb->dev = fast_rx->dev; + + ieee80211_rx_stats(fast_rx->dev, skb->len); + + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + u64_stats_update_begin(&sta->rx_stats.syncp); + sta->rx_stats.msdu[rx->seqno_idx]++; + sta->rx_stats.bytes += orig_len; + u64_stats_update_end(&sta->rx_stats.syncp); + + if (fast_rx->internal_forward) { + struct sta_info *dsta = sta_info_get(rx->sdata, skb->data); + + if (dsta) { + /* + * Send to wireless media and increase priority by 256 + * to keep the received priority instead of + * reclassifying the frame (see cfg80211_classify8021d). + */ + skb->priority += 256; + skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + dev_queue_xmit(skb); + return true; + } + } + + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, fast_rx->dev); + memset(skb->cb, 0, sizeof(skb->cb)); + if (rx->napi) + napi_gro_receive(rx->napi, skb); + else + netif_receive_skb(skb); + + return true; + drop: + dev_kfree_skb(skb); + sta->rx_stats.dropped++; + return true; +} + /* * This function returns whether or not the SKB * was destined for RX processing or not, which, @@ -3522,6 +3858,21 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, rx->skb = skb; + /* See if we can do fast-rx; if we have to copy we already lost, + * so punt in that case. We should never have to deliver a data + * frame to multiple interfaces anyway. + * + * We skip the ieee80211_accept_frame() call and do the necessary + * checking inside ieee80211_invoke_fast_rx(). + */ + if (consume && rx->sta) { + struct ieee80211_fast_rx *fast_rx; + + fast_rx = rcu_dereference(rx->sta->fast_rx); + if (fast_rx && ieee80211_invoke_fast_rx(rx, fast_rx)) + return true; + } + if (!ieee80211_accept_frame(rx)) return false; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index bdd303e8b577..a0ce7e40f420 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1874,6 +1874,7 @@ int sta_info_move_state(struct sta_info *sta, atomic_dec(&sta->sdata->bss->num_mcast_sta); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_clear_fast_xmit(sta); + ieee80211_clear_fast_rx(sta); } break; case IEEE80211_STA_AUTHORIZED: @@ -1884,6 +1885,7 @@ int sta_info_move_state(struct sta_info *sta, atomic_inc(&sta->sdata->bss->num_mcast_sta); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); + ieee80211_check_fast_rx(sta); } break; default: diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 7c23b575672e..a0a06609338d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -285,6 +285,38 @@ struct ieee80211_fast_tx { struct rcu_head rcu_head; }; +/** + * struct ieee80211_fast_rx - RX fastpath information + * @dev: netdevice for reporting the SKB + * @vif_type: (P2P-less) interface type of the original sdata (sdata->vif.type) + * @vif_addr: interface address + * @rfc1042_hdr: copy of the RFC 1042 SNAP header (to have in cache) + * @control_port_protocol: control port protocol copied from sdata + * @expected_ds_bits: from/to DS bits expected + * @icv_len: length of the MIC if present + * @key: bool indicating encryption is expected (key is set) + * @sta_notify: notify the MLME code (once) + * @internal_forward: forward froms internally on AP/VLAN type interfaces + * @da_offs: offset of the DA in the header (for header conversion) + * @sa_offs: offset of the SA in the header (for header conversion) + * @rcu_head: RCU head for freeing this structure + */ +struct ieee80211_fast_rx { + struct net_device *dev; + enum nl80211_iftype vif_type; + u8 vif_addr[ETH_ALEN] __aligned(2); + u8 rfc1042_hdr[6] __aligned(2); + __be16 control_port_protocol; + __le16 expected_ds_bits; + u8 icv_len; + u8 key:1, + sta_notify:1, + internal_forward:1; + u8 da_offs, sa_offs; + + struct rcu_head rcu_head; +}; + /** * struct mesh_sta - mesh STA information * @plink_lock: serialize access to plink fields @@ -391,6 +423,7 @@ DECLARE_EWMA(signal, 1024, 8) * @cipher_scheme: optional cipher scheme for this station * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @fast_tx: TX fastpath information + * @fast_rx: RX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. * @tx_stats: TX statistics @@ -414,6 +447,7 @@ struct sta_info { spinlock_t lock; struct ieee80211_fast_tx __rcu *fast_tx; + struct ieee80211_fast_rx __rcu *fast_rx; #ifdef CONFIG_MAC80211_MESH struct mesh_sta *mesh; -- cgit v1.2.3 From 6e0456b5454561c4e9fa9e8a4acea405e6d56c80 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 3 Mar 2016 22:59:00 +0100 Subject: mac80211: add A-MSDU tx support Requires software tx queueing and fast-xmit support. For good performance, drivers need frag_list support as well. This avoids the need for copying data of aggregated frames. Running without it is only supported for debugging purposes. To avoid performance and packet size issues, the rate control module or driver needs to limit the maximum A-MSDU size by setting max_rc_amsdu_len in struct ieee80211_sta. Signed-off-by: Felix Fietkau [fix locking issue] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 + include/net/mac80211.h | 19 ++++++ net/mac80211/agg-tx.c | 5 ++ net/mac80211/debugfs.c | 2 + net/mac80211/ieee80211_i.h | 1 + net/mac80211/sta_info.c | 2 + net/mac80211/tx.c | 156 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 188 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 113bfc468a4d..b118744d3382 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -164,6 +164,9 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) /* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */ #define IEEE80211_MAX_FRAME_LEN 2352 +/* Maximal size of an A-MSDU that can be transported in a HT BA session */ +#define IEEE80211_MAX_MPDU_LEN_HT_BA 4095 + /* Maximal size of an A-MSDU */ #define IEEE80211_MAX_MPDU_LEN_HT_3839 3839 #define IEEE80211_MAX_MPDU_LEN_HT_7935 7935 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5f4b4c773a92..a3ee76559791 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -713,6 +713,7 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll * frame (PS-Poll or uAPSD). * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information + * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame * * These flags are used in tx_info->control.flags. */ @@ -720,6 +721,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_PORT_CTRL_PROTO = BIT(0), IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1), IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), + IEEE80211_TX_CTRL_AMSDU = BIT(3), }; /* @@ -1746,6 +1748,7 @@ struct ieee80211_sta_rates { * Both additional HT limits must be enforced by the low level driver. * This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2). * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. + * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) */ struct ieee80211_sta { @@ -1767,6 +1770,7 @@ struct ieee80211_sta { u8 max_amsdu_subframes; u16 max_amsdu_len; bool support_p2p_ps; + u16 max_rc_amsdu_len; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; @@ -1983,6 +1987,15 @@ struct ieee80211_txq { * @IEEE80211_HW_USES_RSS: The device uses RSS and thus requires parallel RX, * which implies using per-CPU station statistics. * + * @IEEE80211_HW_TX_AMSDU: Hardware (or driver) supports software aggregated + * A-MSDU frames. Requires software tx queueing and fast-xmit support. + * When not using minstrel/minstrel_ht rate control, the driver must + * limit the maximum A-MSDU size based on the current tx rate by setting + * max_rc_amsdu_len in struct ieee80211_sta. + * + * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list + * skbs, needed for zero-copy software A-MSDU. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2021,6 +2034,8 @@ enum ieee80211_hw_flags { IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR, IEEE80211_HW_SUPPORTS_REORDERING_BUFFER, IEEE80211_HW_USES_RSS, + IEEE80211_HW_TX_AMSDU, + IEEE80211_HW_TX_FRAG_LIST, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2093,6 +2108,9 @@ enum ieee80211_hw_flags { * size is smaller (an example is LinkSys WRT120N with FW v1.0.07 * build 002 Jun 18 2012). * + * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum + * of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list. + * * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX * (if %IEEE80211_HW_QUEUE_CONTROL is set) * @@ -2147,6 +2165,7 @@ struct ieee80211_hw { u8 max_rate_tries; u8 max_rx_aggregation_subframes; u8 max_tx_aggregation_subframes; + u8 max_tx_fragments; u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; u16 radiotap_vht_details; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 4932e9f243a2..42fa81031dfa 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, size_t len) { struct tid_ampdu_tx *tid_tx; + struct ieee80211_txq *txq; u16 capab, tid; u8 buf_size; bool amsdu; @@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); + txq = sta->sta.txq[tid]; + if (!amsdu && txq) + set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags); + mutex_lock(&sta->ampdu_mlme.mtx); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 52ed2afc408d..b251b2f7f8dd 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -128,6 +128,8 @@ static const char *hw_flag_names[] = { FLAG(NEEDS_UNIQUE_STA_ADDR), FLAG(SUPPORTS_REORDERING_BUFFER), FLAG(USES_RSS), + FLAG(TX_AMSDU), + FLAG(TX_FRAG_LIST), #undef FLAG }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6243109979ed..40c1d343992c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -802,6 +802,7 @@ struct mac80211_qos_map { enum txq_info_flags { IEEE80211_TXQ_STOP, IEEE80211_TXQ_AMPDU, + IEEE80211_TXQ_NO_AMSDU, }; struct txq_info { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cf2aca0cc200..960e13d8ed30 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -416,6 +416,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } + sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 597c8fe672a3..4fa2842ddb25 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1324,6 +1324,10 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, out: spin_unlock_bh(&txqi->queue.lock); + if (skb && skb_has_frag_list(skb) && + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) + skb_linearize(skb); + return skb; } EXPORT_SYMBOL(ieee80211_tx_dequeue); @@ -2802,6 +2806,154 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta) kfree_rcu(fast_tx, rcu_head); } +static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local, + struct sk_buff *skb, int headroom, + int *subframe_len) +{ + int amsdu_len = *subframe_len + sizeof(struct ethhdr); + int padding = (4 - amsdu_len) & 3; + + if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) { + I802_DEBUG_INC(local->tx_expand_skb_head); + + if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) { + wiphy_debug(local->hw.wiphy, + "failed to reallocate TX buffer\n"); + return false; + } + } + + if (padding) { + *subframe_len += padding; + memset(skb_put(skb, padding), 0, padding); + } + + return true; +} + +static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr; + struct ethhdr amsdu_hdr; + int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header); + int subframe_len = skb->len - hdr_len; + void *data; + u8 *qc; + + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) + return false; + + if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) + return true; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr), + &subframe_len)) + return false; + + amsdu_hdr.h_proto = cpu_to_be16(subframe_len); + memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN); + memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN); + + data = skb_push(skb, sizeof(amsdu_hdr)); + memmove(data, data + sizeof(amsdu_hdr), hdr_len); + memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr)); + + hdr = data; + qc = ieee80211_get_qos_ctl(hdr); + *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + info->control.flags |= IEEE80211_TX_CTRL_AMSDU; + + return true; +} + +static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + struct sk_buff **frag_tail, *head; + int subframe_len = skb->len - ETH_ALEN; + u8 max_subframes = sta->sta.max_amsdu_subframes; + int max_frags = local->hw.max_tx_fragments; + int max_amsdu_len = sta->sta.max_amsdu_len; + __be16 len; + void *data; + bool ret = false; + int n = 1, nfrags; + + if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) + return false; + + if (!txq) + return false; + + txqi = to_txq_info(txq); + if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags)) + return false; + + if (sta->sta.max_rc_amsdu_len) + max_amsdu_len = min_t(int, max_amsdu_len, + sta->sta.max_rc_amsdu_len); + + spin_lock_bh(&txqi->queue.lock); + + head = skb_peek_tail(&txqi->queue); + if (!head) + goto out; + + if (skb->len + head->len > max_amsdu_len) + goto out; + + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) + goto out; + + nfrags = 1 + skb_shinfo(skb)->nr_frags; + nfrags += 1 + skb_shinfo(head)->nr_frags; + frag_tail = &skb_shinfo(head)->frag_list; + while (*frag_tail) { + nfrags += 1 + skb_shinfo(*frag_tail)->nr_frags; + frag_tail = &(*frag_tail)->next; + n++; + } + + if (max_subframes && n > max_subframes) + goto out; + + if (max_frags && nfrags > max_frags) + goto out; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2, + &subframe_len)) + goto out; + + ret = true; + data = skb_push(skb, ETH_ALEN + 2); + memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN); + + data += 2 * ETH_ALEN; + len = cpu_to_be16(subframe_len); + memcpy(data, &len, 2); + memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header)); + + head->len += skb->len; + head->data_len += skb->len; + *frag_tail = skb; + +out: + spin_unlock_bh(&txqi->queue.lock); + + return ret; +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, @@ -2856,6 +3008,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, ieee80211_tx_stats(dev, skb->len + extra_head); + if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) + return true; + /* will not be crypto-handled beyond what we do here, so use false * as the may-encrypt argument for the resize to not account for * more room than we already have in 'extra_head' -- cgit v1.2.3 From e68503bd6836ba765dc8e0ee77ea675fedc07e41 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:24 +0100 Subject: KEYS: Generalise system_verify_data() to provide access to internal content Generalise system_verify_data() to provide access to internal content through a callback. This allows all the PKCS#7 stuff to be hidden inside this function and removed from the PE file parser and the PKCS#7 test key. If external content is not required, NULL should be passed as data to the function. If the callback is not required, that can be set to NULL. The function is now called verify_pkcs7_signature() to contrast with verify_pefile_signature() and the definitions of both have been moved into linux/verification.h along with the key_being_used_for enum. Signed-off-by: David Howells --- arch/x86/kernel/kexec-bzimage64.c | 18 ++------- certs/system_keyring.c | 45 ++++++++++++++++----- crypto/asymmetric_keys/Kconfig | 4 +- crypto/asymmetric_keys/mscode_parser.c | 21 ++++------ crypto/asymmetric_keys/pkcs7_key_type.c | 72 +++++++++++++-------------------- crypto/asymmetric_keys/pkcs7_parser.c | 21 +++++----- crypto/asymmetric_keys/verify_pefile.c | 40 +++++------------- crypto/asymmetric_keys/verify_pefile.h | 5 +-- include/crypto/pkcs7.h | 3 +- include/crypto/public_key.h | 14 ------- include/keys/asymmetric-type.h | 1 + include/keys/system_keyring.h | 7 +--- include/linux/verification.h | 50 +++++++++++++++++++++++ include/linux/verify_pefile.h | 22 ---------- kernel/module_signing.c | 5 ++- 15 files changed, 155 insertions(+), 173 deletions(-) create mode 100644 include/linux/verification.h delete mode 100644 include/linux/verify_pefile.h (limited to 'include/linux') diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 2af478e3fd4e..f2356bda2b05 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -19,8 +19,7 @@ #include #include #include -#include -#include +#include #include #include @@ -529,18 +528,9 @@ static int bzImage64_cleanup(void *loader_data) #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) { - bool trusted; - int ret; - - ret = verify_pefile_signature(kernel, kernel_len, - system_trusted_keyring, - VERIFYING_KEXEC_PE_SIGNATURE, - &trusted); - if (ret < 0) - return ret; - if (!trusted) - return -EKEYREJECTED; - return 0; + return verify_pefile_signature(kernel, kernel_len, + NULL, + VERIFYING_KEXEC_PE_SIGNATURE); } #endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index f4180326c2e1..a83bffedc0aa 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -108,16 +108,25 @@ late_initcall(load_system_certificate_list); #ifdef CONFIG_SYSTEM_DATA_VERIFICATION /** - * Verify a PKCS#7-based signature on system data. - * @data: The data to be verified. + * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data. + * @data: The data to be verified (NULL if expecting internal data). * @len: Size of @data. * @raw_pkcs7: The PKCS#7 message that is the signature. * @pkcs7_len: The size of @raw_pkcs7. + * @trusted_keys: Trusted keys to use (NULL for system_trusted_keyring). * @usage: The use to which the key is being put. + * @view_content: Callback to gain access to content. + * @ctx: Context for callback. */ -int system_verify_data(const void *data, unsigned long len, - const void *raw_pkcs7, size_t pkcs7_len, - enum key_being_used_for usage) +int verify_pkcs7_signature(const void *data, size_t len, + const void *raw_pkcs7, size_t pkcs7_len, + struct key *trusted_keys, + int untrusted_error, + enum key_being_used_for usage, + int (*view_content)(void *ctx, + const void *data, size_t len, + size_t asn1hdrlen), + void *ctx) { struct pkcs7_message *pkcs7; bool trusted; @@ -128,7 +137,7 @@ int system_verify_data(const void *data, unsigned long len, return PTR_ERR(pkcs7); /* The data should be detached - so we need to supply it. */ - if (pkcs7_supply_detached_data(pkcs7, data, len) < 0) { + if (data && pkcs7_supply_detached_data(pkcs7, data, len) < 0) { pr_err("PKCS#7 signature with non-detached data\n"); ret = -EBADMSG; goto error; @@ -138,13 +147,29 @@ int system_verify_data(const void *data, unsigned long len, if (ret < 0) goto error; - ret = pkcs7_validate_trust(pkcs7, system_trusted_keyring, &trusted); + if (!trusted_keys) + trusted_keys = system_trusted_keyring; + ret = pkcs7_validate_trust(pkcs7, trusted_keys, &trusted); if (ret < 0) goto error; - if (!trusted) { + if (!trusted && untrusted_error) { pr_err("PKCS#7 signature not signed with a trusted key\n"); - ret = -ENOKEY; + ret = untrusted_error; + goto error; + } + + if (view_content) { + size_t asn1hdrlen; + + ret = pkcs7_get_content_data(pkcs7, &data, &len, &asn1hdrlen); + if (ret < 0) { + if (ret == -ENODATA) + pr_devel("PKCS#7 message does not contain data\n"); + goto error; + } + + ret = view_content(ctx, data, len, asn1hdrlen); } error: @@ -152,6 +177,6 @@ error: pr_devel("<==%s() = %d\n", __func__, ret); return ret; } -EXPORT_SYMBOL_GPL(system_verify_data); +EXPORT_SYMBOL_GPL(verify_pkcs7_signature); #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig index 91a7e047a765..f7d2ef9789d8 100644 --- a/crypto/asymmetric_keys/Kconfig +++ b/crypto/asymmetric_keys/Kconfig @@ -40,8 +40,7 @@ config PKCS7_MESSAGE_PARSER config PKCS7_TEST_KEY tristate "PKCS#7 testing key type" - depends on PKCS7_MESSAGE_PARSER - select SYSTEM_TRUSTED_KEYRING + depends on SYSTEM_DATA_VERIFICATION help This option provides a type of key that can be loaded up from a PKCS#7 message - provided the message is signed by a trusted key. If @@ -54,6 +53,7 @@ config PKCS7_TEST_KEY config SIGNED_PE_FILE_VERIFICATION bool "Support for PE file signature verification" depends on PKCS7_MESSAGE_PARSER=y + depends on SYSTEM_DATA_VERIFICATION select ASN1 select OID_REGISTRY help diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c index 3242cbfaeaa2..6a76d5c70ef6 100644 --- a/crypto/asymmetric_keys/mscode_parser.c +++ b/crypto/asymmetric_keys/mscode_parser.c @@ -21,19 +21,13 @@ /* * Parse a Microsoft Individual Code Signing blob */ -int mscode_parse(struct pefile_context *ctx) +int mscode_parse(void *_ctx, const void *content_data, size_t data_len, + size_t asn1hdrlen) { - const void *content_data; - size_t data_len; - int ret; - - ret = pkcs7_get_content_data(ctx->pkcs7, &content_data, &data_len, 1); - - if (ret) { - pr_debug("PKCS#7 message does not contain data\n"); - return ret; - } + struct pefile_context *ctx = _ctx; + content_data -= asn1hdrlen; + data_len += asn1hdrlen; pr_devel("Data: %zu [%*ph]\n", data_len, (unsigned)(data_len), content_data); @@ -129,7 +123,6 @@ int mscode_note_digest(void *context, size_t hdrlen, { struct pefile_context *ctx = context; - ctx->digest = value; - ctx->digest_len = vlen; - return 0; + ctx->digest = kmemdup(value, vlen, GFP_KERNEL); + return ctx->digest ? 0 : -ENOMEM; } diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c index e2d0edbbc71a..ab9bf5363ecd 100644 --- a/crypto/asymmetric_keys/pkcs7_key_type.c +++ b/crypto/asymmetric_keys/pkcs7_key_type.c @@ -13,12 +13,9 @@ #include #include #include +#include #include -#include -#include #include -#include -#include "pkcs7_parser.h" MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("PKCS#7 testing key type"); @@ -29,59 +26,46 @@ MODULE_PARM_DESC(pkcs7_usage, "Usage to specify when verifying the PKCS#7 message"); /* - * Preparse a PKCS#7 wrapped and validated data blob. + * Retrieve the PKCS#7 message content. */ -static int pkcs7_preparse(struct key_preparsed_payload *prep) +static int pkcs7_view_content(void *ctx, const void *data, size_t len, + size_t asn1hdrlen) { - enum key_being_used_for usage = pkcs7_usage; - struct pkcs7_message *pkcs7; - const void *data, *saved_prep_data; - size_t datalen, saved_prep_datalen; - bool trusted; + struct key_preparsed_payload *prep = ctx; + const void *saved_prep_data; + size_t saved_prep_datalen; int ret; - kenter(""); - - if (usage >= NR__KEY_BEING_USED_FOR) { - pr_err("Invalid usage type %d\n", usage); - return -EINVAL; - } - saved_prep_data = prep->data; saved_prep_datalen = prep->datalen; - pkcs7 = pkcs7_parse_message(saved_prep_data, saved_prep_datalen); - if (IS_ERR(pkcs7)) { - ret = PTR_ERR(pkcs7); - goto error; - } - - ret = pkcs7_verify(pkcs7, usage); - if (ret < 0) - goto error_free; - - ret = pkcs7_validate_trust(pkcs7, system_trusted_keyring, &trusted); - if (ret < 0) - goto error_free; - if (!trusted) - pr_warn("PKCS#7 message doesn't chain back to a trusted key\n"); - - ret = pkcs7_get_content_data(pkcs7, &data, &datalen, false); - if (ret < 0) - goto error_free; - prep->data = data; - prep->datalen = datalen; + prep->datalen = len; + ret = user_preparse(prep); + prep->data = saved_prep_data; prep->datalen = saved_prep_datalen; - -error_free: - pkcs7_free_message(pkcs7); -error: - kleave(" = %d", ret); return ret; } +/* + * Preparse a PKCS#7 wrapped and validated data blob. + */ +static int pkcs7_preparse(struct key_preparsed_payload *prep) +{ + enum key_being_used_for usage = pkcs7_usage; + + if (usage >= NR__KEY_BEING_USED_FOR) { + pr_err("Invalid usage type %d\n", usage); + return -EINVAL; + } + + return verify_pkcs7_signature(NULL, 0, + prep->data, prep->datalen, + NULL, -ENOKEY, usage, + pkcs7_view_content, prep); +} + /* * user defined keys take an arbitrary string as the description and an * arbitrary blob of data as the payload diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index 835701613125..af4cd8649117 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -168,24 +168,25 @@ EXPORT_SYMBOL_GPL(pkcs7_parse_message); * @pkcs7: The preparsed PKCS#7 message to access * @_data: Place to return a pointer to the data * @_data_len: Place to return the data length - * @want_wrapper: True if the ASN.1 object header should be included in the data + * @_headerlen: Size of ASN.1 header not included in _data * - * Get access to the data content of the PKCS#7 message, including, optionally, - * the header of the ASN.1 object that contains it. Returns -ENODATA if the - * data object was missing from the message. + * Get access to the data content of the PKCS#7 message. The size of the + * header of the ASN.1 object that contains it is also provided and can be used + * to adjust *_data and *_data_len to get the entire object. + * + * Returns -ENODATA if the data object was missing from the message. */ int pkcs7_get_content_data(const struct pkcs7_message *pkcs7, const void **_data, size_t *_data_len, - bool want_wrapper) + size_t *_headerlen) { - size_t wrapper; - if (!pkcs7->data) return -ENODATA; - wrapper = want_wrapper ? pkcs7->data_hdrlen : 0; - *_data = pkcs7->data - wrapper; - *_data_len = pkcs7->data_len + wrapper; + *_data = pkcs7->data; + *_data_len = pkcs7->data_len; + if (_headerlen) + *_headerlen = pkcs7->data_hdrlen; return 0; } EXPORT_SYMBOL_GPL(pkcs7_get_content_data); diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c index 7e8c2338ae25..265351075b0e 100644 --- a/crypto/asymmetric_keys/verify_pefile.c +++ b/crypto/asymmetric_keys/verify_pefile.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include "verify_pefile.h" @@ -392,9 +392,8 @@ error_no_desc: * verify_pefile_signature - Verify the signature on a PE binary image * @pebuf: Buffer containing the PE binary image * @pelen: Length of the binary image - * @trust_keyring: Signing certificates to use as starting points + * @trust_keys: Signing certificate(s) to use as starting points * @usage: The use to which the key is being put. - * @_trusted: Set to true if trustworth, false otherwise * * Validate that the certificate chain inside the PKCS#7 message inside the PE * binary image intersects keys we already know and trust. @@ -418,14 +417,10 @@ error_no_desc: * May also return -ENOMEM. */ int verify_pefile_signature(const void *pebuf, unsigned pelen, - struct key *trusted_keyring, - enum key_being_used_for usage, - bool *_trusted) + struct key *trusted_keys, + enum key_being_used_for usage) { - struct pkcs7_message *pkcs7; struct pefile_context ctx; - const void *data; - size_t datalen; int ret; kenter(""); @@ -439,19 +434,10 @@ int verify_pefile_signature(const void *pebuf, unsigned pelen, if (ret < 0) return ret; - pkcs7 = pkcs7_parse_message(pebuf + ctx.sig_offset, ctx.sig_len); - if (IS_ERR(pkcs7)) - return PTR_ERR(pkcs7); - ctx.pkcs7 = pkcs7; - - ret = pkcs7_get_content_data(ctx.pkcs7, &data, &datalen, false); - if (ret < 0 || datalen == 0) { - pr_devel("PKCS#7 message does not contain data\n"); - ret = -EBADMSG; - goto error; - } - - ret = mscode_parse(&ctx); + ret = verify_pkcs7_signature(NULL, 0, + pebuf + ctx.sig_offset, ctx.sig_len, + trusted_keys, -EKEYREJECTED, usage, + mscode_parse, &ctx); if (ret < 0) goto error; @@ -462,16 +448,8 @@ int verify_pefile_signature(const void *pebuf, unsigned pelen, * contents. */ ret = pefile_digest_pe(pebuf, pelen, &ctx); - if (ret < 0) - goto error; - - ret = pkcs7_verify(pkcs7, usage); - if (ret < 0) - goto error; - - ret = pkcs7_validate_trust(pkcs7, trusted_keyring, _trusted); error: - pkcs7_free_message(ctx.pkcs7); + kfree(ctx.digest); return ret; } diff --git a/crypto/asymmetric_keys/verify_pefile.h b/crypto/asymmetric_keys/verify_pefile.h index a133eb81a492..cd4d20930728 100644 --- a/crypto/asymmetric_keys/verify_pefile.h +++ b/crypto/asymmetric_keys/verify_pefile.h @@ -9,7 +9,6 @@ * 2 of the Licence, or (at your option) any later version. */ -#include #include #include @@ -23,7 +22,6 @@ struct pefile_context { unsigned sig_offset; unsigned sig_len; const struct section_header *secs; - struct pkcs7_message *pkcs7; /* PKCS#7 MS Individual Code Signing content */ const void *digest; /* Digest */ @@ -39,4 +37,5 @@ struct pefile_context { /* * mscode_parser.c */ -extern int mscode_parse(struct pefile_context *ctx); +extern int mscode_parse(void *_ctx, const void *content_data, size_t data_len, + size_t asn1hdrlen); diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h index 441aff9b5aa7..8323e3e57131 100644 --- a/include/crypto/pkcs7.h +++ b/include/crypto/pkcs7.h @@ -12,6 +12,7 @@ #ifndef _CRYPTO_PKCS7_H #define _CRYPTO_PKCS7_H +#include #include struct key; @@ -26,7 +27,7 @@ extern void pkcs7_free_message(struct pkcs7_message *pkcs7); extern int pkcs7_get_content_data(const struct pkcs7_message *pkcs7, const void **_data, size_t *_datalen, - bool want_wrapper); + size_t *_headerlen); /* * pkcs7_trust.c diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index 2f5de5c1a3a0..b3928e801b8c 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -14,20 +14,6 @@ #ifndef _LINUX_PUBLIC_KEY_H #define _LINUX_PUBLIC_KEY_H -/* - * The use to which an asymmetric key is being put. - */ -enum key_being_used_for { - VERIFYING_MODULE_SIGNATURE, - VERIFYING_FIRMWARE_SIGNATURE, - VERIFYING_KEXEC_PE_SIGNATURE, - VERIFYING_KEY_SIGNATURE, - VERIFYING_KEY_SELF_SIGNATURE, - VERIFYING_UNSPECIFIED_SIGNATURE, - NR__KEY_BEING_USED_FOR -}; -extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR]; - /* * Cryptographic data for the public-key subtype of the asymmetric key type. * diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h index 70a8775bb444..d1e23dda4363 100644 --- a/include/keys/asymmetric-type.h +++ b/include/keys/asymmetric-type.h @@ -15,6 +15,7 @@ #define _KEYS_ASYMMETRIC_TYPE_H #include +#include extern struct key_type key_type_asymmetric; diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 39fd38cfa8c9..b2d645ac35a0 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -15,6 +15,7 @@ #ifdef CONFIG_SYSTEM_TRUSTED_KEYRING #include +#include #include extern struct key *system_trusted_keyring; @@ -29,12 +30,6 @@ static inline struct key *get_system_trusted_keyring(void) } #endif -#ifdef CONFIG_SYSTEM_DATA_VERIFICATION -extern int system_verify_data(const void *data, unsigned long len, - const void *raw_pkcs7, size_t pkcs7_len, - enum key_being_used_for usage); -#endif - #ifdef CONFIG_IMA_MOK_KEYRING extern struct key *ima_mok_keyring; extern struct key *ima_blacklist_keyring; diff --git a/include/linux/verification.h b/include/linux/verification.h new file mode 100644 index 000000000000..bb0fcf941cb7 --- /dev/null +++ b/include/linux/verification.h @@ -0,0 +1,50 @@ +/* Signature verification + * + * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_VERIFICATION_H +#define _LINUX_VERIFICATION_H + +/* + * The use to which an asymmetric key is being put. + */ +enum key_being_used_for { + VERIFYING_MODULE_SIGNATURE, + VERIFYING_FIRMWARE_SIGNATURE, + VERIFYING_KEXEC_PE_SIGNATURE, + VERIFYING_KEY_SIGNATURE, + VERIFYING_KEY_SELF_SIGNATURE, + VERIFYING_UNSPECIFIED_SIGNATURE, + NR__KEY_BEING_USED_FOR +}; +extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR]; + +#ifdef CONFIG_SYSTEM_DATA_VERIFICATION + +struct key; + +extern int verify_pkcs7_signature(const void *data, size_t len, + const void *raw_pkcs7, size_t pkcs7_len, + struct key *trusted_keys, + int untrusted_error, + enum key_being_used_for usage, + int (*view_content)(void *ctx, + const void *data, size_t len, + size_t asn1hdrlen), + void *ctx); + +#ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION +extern int verify_pefile_signature(const void *pebuf, unsigned pelen, + struct key *trusted_keys, + enum key_being_used_for usage); +#endif + +#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ +#endif /* _LINUX_VERIFY_PEFILE_H */ diff --git a/include/linux/verify_pefile.h b/include/linux/verify_pefile.h deleted file mode 100644 index da2049b5161c..000000000000 --- a/include/linux/verify_pefile.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Signed PE file verification - * - * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _LINUX_VERIFY_PEFILE_H -#define _LINUX_VERIFY_PEFILE_H - -#include - -extern int verify_pefile_signature(const void *pebuf, unsigned pelen, - struct key *trusted_keyring, - enum key_being_used_for usage, - bool *_trusted); - -#endif /* _LINUX_VERIFY_PEFILE_H */ diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 64b9dead4a07..593aace88a02 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -80,6 +80,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen) return -EBADMSG; } - return system_verify_data(mod, modlen, mod + modlen, sig_len, - VERIFYING_MODULE_SIGNATURE); + return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + NULL, -ENOKEY, VERIFYING_MODULE_SIGNATURE, + NULL, NULL); } -- cgit v1.2.3 From bda850cd214e90b1be0cc25bc48c4f6ac53eb543 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:24 +0100 Subject: PKCS#7: Make trust determination dependent on contents of trust keyring Make the determination of the trustworthiness of a key dependent on whether a key that can verify it is present in the supplied ring of trusted keys rather than whether or not the verifying key has KEY_FLAG_TRUSTED set. verify_pkcs7_signature() will return -ENOKEY if the PKCS#7 message trust chain cannot be verified. Signed-off-by: David Howells --- certs/system_keyring.c | 13 ++++--------- crypto/asymmetric_keys/pkcs7_key_type.c | 2 +- crypto/asymmetric_keys/pkcs7_parser.h | 1 - crypto/asymmetric_keys/pkcs7_trust.c | 18 +++--------------- crypto/asymmetric_keys/verify_pefile.c | 2 +- crypto/asymmetric_keys/x509_parser.h | 1 - include/crypto/pkcs7.h | 3 +-- include/linux/verification.h | 1 - kernel/module_signing.c | 2 +- 9 files changed, 11 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/certs/system_keyring.c b/certs/system_keyring.c index a83bffedc0aa..dc18869ff680 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -121,7 +121,6 @@ late_initcall(load_system_certificate_list); int verify_pkcs7_signature(const void *data, size_t len, const void *raw_pkcs7, size_t pkcs7_len, struct key *trusted_keys, - int untrusted_error, enum key_being_used_for usage, int (*view_content)(void *ctx, const void *data, size_t len, @@ -129,7 +128,6 @@ int verify_pkcs7_signature(const void *data, size_t len, void *ctx) { struct pkcs7_message *pkcs7; - bool trusted; int ret; pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len); @@ -149,13 +147,10 @@ int verify_pkcs7_signature(const void *data, size_t len, if (!trusted_keys) trusted_keys = system_trusted_keyring; - ret = pkcs7_validate_trust(pkcs7, trusted_keys, &trusted); - if (ret < 0) - goto error; - - if (!trusted && untrusted_error) { - pr_err("PKCS#7 signature not signed with a trusted key\n"); - ret = untrusted_error; + ret = pkcs7_validate_trust(pkcs7, trusted_keys); + if (ret < 0) { + if (ret == -ENOKEY) + pr_err("PKCS#7 signature not signed with a trusted key\n"); goto error; } diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c index ab9bf5363ecd..3b92523882e5 100644 --- a/crypto/asymmetric_keys/pkcs7_key_type.c +++ b/crypto/asymmetric_keys/pkcs7_key_type.c @@ -62,7 +62,7 @@ static int pkcs7_preparse(struct key_preparsed_payload *prep) return verify_pkcs7_signature(NULL, 0, prep->data, prep->datalen, - NULL, -ENOKEY, usage, + NULL, usage, pkcs7_view_content, prep); } diff --git a/crypto/asymmetric_keys/pkcs7_parser.h b/crypto/asymmetric_keys/pkcs7_parser.h index d5eec31e95b6..f4e81074f5e0 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.h +++ b/crypto/asymmetric_keys/pkcs7_parser.h @@ -22,7 +22,6 @@ struct pkcs7_signed_info { struct pkcs7_signed_info *next; struct x509_certificate *signer; /* Signing certificate (in msg->certs) */ unsigned index; - bool trusted; bool unsupported_crypto; /* T if not usable due to missing crypto */ /* Message digest - the digest of the Content Data (or NULL) */ diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c index b9a5487cd82d..36e77cb07bd0 100644 --- a/crypto/asymmetric_keys/pkcs7_trust.c +++ b/crypto/asymmetric_keys/pkcs7_trust.c @@ -30,7 +30,6 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7, struct public_key_signature *sig = sinfo->sig; struct x509_certificate *x509, *last = NULL, *p; struct key *key; - bool trusted; int ret; kenter(",%u,", sinfo->index); @@ -42,10 +41,8 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7, for (x509 = sinfo->signer; x509; x509 = x509->signer) { if (x509->seen) { - if (x509->verified) { - trusted = x509->trusted; + if (x509->verified) goto verified; - } kleave(" = -ENOKEY [cached]"); return -ENOKEY; } @@ -122,7 +119,6 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7, matched: ret = verify_signature(key, sig); - trusted = test_bit(KEY_FLAG_TRUSTED, &key->flags); key_put(key); if (ret < 0) { if (ret == -ENOMEM) @@ -134,12 +130,9 @@ matched: verified: if (x509) { x509->verified = true; - for (p = sinfo->signer; p != x509; p = p->signer) { + for (p = sinfo->signer; p != x509; p = p->signer) p->verified = true; - p->trusted = trusted; - } } - sinfo->trusted = trusted; kleave(" = 0"); return 0; } @@ -148,7 +141,6 @@ verified: * pkcs7_validate_trust - Validate PKCS#7 trust chain * @pkcs7: The PKCS#7 certificate to validate * @trust_keyring: Signing certificates to use as starting points - * @_trusted: Set to true if trustworth, false otherwise * * Validate that the certificate chain inside the PKCS#7 message intersects * keys we already know and trust. @@ -170,16 +162,13 @@ verified: * May also return -ENOMEM. */ int pkcs7_validate_trust(struct pkcs7_message *pkcs7, - struct key *trust_keyring, - bool *_trusted) + struct key *trust_keyring) { struct pkcs7_signed_info *sinfo; struct x509_certificate *p; int cached_ret = -ENOKEY; int ret; - *_trusted = false; - for (p = pkcs7->certs; p; p = p->next) p->seen = false; @@ -193,7 +182,6 @@ int pkcs7_validate_trust(struct pkcs7_message *pkcs7, cached_ret = -ENOPKG; continue; case 0: - *_trusted |= sinfo->trusted; cached_ret = 0; continue; default: diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c index 265351075b0e..672a94c2c3ff 100644 --- a/crypto/asymmetric_keys/verify_pefile.c +++ b/crypto/asymmetric_keys/verify_pefile.c @@ -436,7 +436,7 @@ int verify_pefile_signature(const void *pebuf, unsigned pelen, ret = verify_pkcs7_signature(NULL, 0, pebuf + ctx.sig_offset, ctx.sig_len, - trusted_keys, -EKEYREJECTED, usage, + trusted_keys, usage, mscode_parse, &ctx); if (ret < 0) goto error; diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h index f24f4d808e7f..05eef1c68881 100644 --- a/crypto/asymmetric_keys/x509_parser.h +++ b/crypto/asymmetric_keys/x509_parser.h @@ -39,7 +39,6 @@ struct x509_certificate { unsigned index; bool seen; /* Infinite recursion prevention */ bool verified; - bool trusted; bool self_signed; /* T if self-signed (check unsupported_sig too) */ bool unsupported_key; /* T if key uses unsupported crypto */ bool unsupported_sig; /* T if signature uses unsupported crypto */ diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h index 8323e3e57131..583f199400a3 100644 --- a/include/crypto/pkcs7.h +++ b/include/crypto/pkcs7.h @@ -33,8 +33,7 @@ extern int pkcs7_get_content_data(const struct pkcs7_message *pkcs7, * pkcs7_trust.c */ extern int pkcs7_validate_trust(struct pkcs7_message *pkcs7, - struct key *trust_keyring, - bool *_trusted); + struct key *trust_keyring); /* * pkcs7_verify.c diff --git a/include/linux/verification.h b/include/linux/verification.h index bb0fcf941cb7..a10549a6c7cd 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -33,7 +33,6 @@ struct key; extern int verify_pkcs7_signature(const void *data, size_t len, const void *raw_pkcs7, size_t pkcs7_len, struct key *trusted_keys, - int untrusted_error, enum key_being_used_for usage, int (*view_content)(void *ctx, const void *data, size_t len, diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 593aace88a02..6a64e03b9f44 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -81,6 +81,6 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen) } return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, - NULL, -ENOKEY, VERIFYING_MODULE_SIGNATURE, + NULL, VERIFYING_MODULE_SIGNATURE, NULL, NULL); } -- cgit v1.2.3 From 31e6850e0fdb3a586363cc4d2f9801cdf9374310 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 5 Apr 2016 12:39:30 +0100 Subject: iommu: Add MMIO mapping type On some platforms, MMIO regions might need slightly different treatment compared to mapping regular memory; add the notion of MMIO mappings to the IOMMU API's memory type flags, so that callers can let the IOMMU drivers know to do the right thing. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a5c539fa5d2b..34b643227df1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -30,6 +30,7 @@ #define IOMMU_WRITE (1 << 1) #define IOMMU_CACHE (1 << 2) /* DMA cache coherency */ #define IOMMU_NOEXEC (1 << 3) +#define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ struct iommu_ops; struct iommu_group; -- cgit v1.2.3 From 848fc67da5fb43ef7d92d20891eef79f5de45816 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Mar 2016 15:32:40 +0100 Subject: clk: renesas: mstp: Drop check for CONFIG_PM_GENERIC_DOMAINS_OF As of commit 71d076ceb245f0d9 ("ARM: shmobile: Enable PM and PM_GENERIC_DOMAINS for SoCs with PM Domains"), CONFIG_PM_GENERIC_DOMAINS_OF is always enabled for SoCs with MSTP clocks. Signed-off-by: Geert Uytterhoeven Acked-by: Laurent Pinchart --- drivers/clk/renesas/clk-mstp.c | 3 --- include/linux/clk/renesas.h | 4 ---- 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c index 3d44e183aedd..5e3b3d856eef 100644 --- a/drivers/clk/renesas/clk-mstp.c +++ b/drivers/clk/renesas/clk-mstp.c @@ -243,8 +243,6 @@ static void __init cpg_mstp_clocks_init(struct device_node *np) } CLK_OF_DECLARE(cpg_mstp_clks, "renesas,cpg-mstp-clocks", cpg_mstp_clocks_init); - -#ifdef CONFIG_PM_GENERIC_DOMAINS_OF int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev) { struct device_node *np = dev->of_node; @@ -326,4 +324,3 @@ void __init cpg_mstp_add_clk_domain(struct device_node *np) of_genpd_add_provider_simple(np, pd); } -#endif /* !CONFIG_PM_GENERIC_DOMAINS_OF */ diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 7adfd80fbf55..2a3379cf1330 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -24,12 +24,8 @@ void r8a7778_clocks_init(u32 mode); void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); -#ifdef CONFIG_PM_GENERIC_DOMAINS_OF void cpg_mstp_add_clk_domain(struct device_node *np); int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev); void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev); -#else -static inline void cpg_mstp_add_clk_domain(struct device_node *np) {} -#endif #endif -- cgit v1.2.3 From 12a56817b329d8a73ab53bad09aa976aeea46db9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Mar 2016 16:59:26 +0100 Subject: clk: renesas: mstp: Clarify cpg_mstp_{at,de}tach_dev() domain parameter Make it clear that the "domain" parameter of the cpg_mstp_attach_dev() and cpg_mstp_detach_dev() functions is not used. The cpg_mstp_attach_dev() and cpg_mstp_detach_dev() callbacks are not only used by the CPG/MSTP Clock Domain driver, but also by the R-Mobile SYSC PM Domain driver. Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/clk-mstp.c | 4 ++-- include/linux/clk/renesas.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c index 5e3b3d856eef..8b597b9a3804 100644 --- a/drivers/clk/renesas/clk-mstp.c +++ b/drivers/clk/renesas/clk-mstp.c @@ -243,7 +243,7 @@ static void __init cpg_mstp_clocks_init(struct device_node *np) } CLK_OF_DECLARE(cpg_mstp_clks, "renesas,cpg-mstp-clocks", cpg_mstp_clocks_init); -int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev) +int cpg_mstp_attach_dev(struct generic_pm_domain *unused, struct device *dev) { struct device_node *np = dev->of_node; struct of_phandle_args clkspec; @@ -295,7 +295,7 @@ fail_put: return error; } -void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev) +void cpg_mstp_detach_dev(struct generic_pm_domain *unused, struct device *dev) { if (!list_empty(&dev->power.subsys_data->clock_list)) pm_clk_destroy(dev); diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 2a3379cf1330..095b1681daf4 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -25,7 +25,7 @@ void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); void cpg_mstp_add_clk_domain(struct device_node *np); -int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev); -void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev); +int cpg_mstp_attach_dev(struct generic_pm_domain *unused, struct device *dev); +void cpg_mstp_detach_dev(struct generic_pm_domain *unused, struct device *dev); #endif -- cgit v1.2.3 From 8ced425ee630c03beea06c1dfa35190bf8395d07 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 5 Apr 2016 17:10:16 +0200 Subject: tun: use socket locks for sk_{attach,detatch}_filter This reverts commit 5a5abb1fa3b05dd ("tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter") and replaces it to use lock_sock around sk_{attach,detach}_filter. The checks inside filter.c are updated with lockdep_sock_is_held to check for proper socket locks. It keeps the code cleaner by ensuring that only one lock governs the socket filter instead of two independent locks. Cc: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/tun.c | 14 +++++++++----- include/linux/filter.h | 4 ---- net/core/filter.c | 35 +++++++++++++---------------------- 3 files changed, 22 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9abc36bf77ea..64bc143eddd9 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -622,8 +622,9 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte /* Re-attach the filter to persist device */ if (!skip_filter && (tun->filter_attached == true)) { - err = __sk_attach_filter(&tun->fprog, tfile->socket.sk, - lockdep_rtnl_is_held()); + lock_sock(tfile->socket.sk); + err = sk_attach_filter(&tun->fprog, tfile->socket.sk); + release_sock(tfile->socket.sk); if (!err) goto out; } @@ -1824,7 +1825,9 @@ static void tun_detach_filter(struct tun_struct *tun, int n) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - __sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held()); + lock_sock(tfile->socket.sk); + sk_detach_filter(tfile->socket.sk); + release_sock(tfile->socket.sk); } tun->filter_attached = false; @@ -1837,8 +1840,9 @@ static int tun_attach_filter(struct tun_struct *tun) for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk, - lockdep_rtnl_is_held()); + lock_sock(tfile->socket.sk); + ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); + release_sock(tfile->socket.sk); if (ret) { tun_detach_filter(tun, i); return ret; diff --git a/include/linux/filter.h b/include/linux/filter.h index a51a5361695f..43aa1f8855c7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -465,14 +465,10 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, - bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); -int __sk_detach_filter(struct sock *sk, bool locked); - int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/net/core/filter.c b/net/core/filter.c index ca7f832b2980..e8486ba601ea 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1149,8 +1149,7 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, - bool locked) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) { struct sk_filter *fp, *old_fp; @@ -1166,8 +1165,10 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, locked); + old_fp = rcu_dereference_protected(sk->sk_filter, + lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_filter, fp); + if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1246,8 +1247,7 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, - bool locked) +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct bpf_prog *prog = __get_filter(fprog, sk); int err; @@ -1255,7 +1255,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk, locked); + err = __sk_attach_prog(prog, sk); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1263,12 +1263,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, return 0; } -EXPORT_SYMBOL_GPL(__sk_attach_filter); - -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) -{ - return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); -} +EXPORT_SYMBOL_GPL(sk_attach_filter); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { @@ -1314,7 +1309,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); + err = __sk_attach_prog(prog, sk); if (err < 0) { bpf_prog_put(prog); return err; @@ -2255,7 +2250,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int __sk_detach_filter(struct sock *sk, bool locked) +int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; struct sk_filter *filter; @@ -2263,7 +2258,8 @@ int __sk_detach_filter(struct sock *sk, bool locked) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, locked); + filter = rcu_dereference_protected(sk->sk_filter, + lockdep_sock_is_held(sk)); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -2272,12 +2268,7 @@ int __sk_detach_filter(struct sock *sk, bool locked) return ret; } -EXPORT_SYMBOL_GPL(__sk_detach_filter); - -int sk_detach_filter(struct sock *sk) -{ - return __sk_detach_filter(sk, sock_owned_by_user(sk)); -} +EXPORT_SYMBOL_GPL(sk_detach_filter); int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) @@ -2288,7 +2279,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, lock_sock(sk); filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (!filter) goto out; -- cgit v1.2.3 From a6024562ffd7e0f31bc6671817840ad1e91de7b4 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 5 Apr 2016 08:22:51 -0700 Subject: udp: Add GRO functions to UDP socket This patch adds GRO functions (gro_receive and gro_complete) to UDP sockets. udp_gro_receive is changed to perform socket lookup on a packet. If a socket is found the related GRO functions are called. This features obsoletes using UDP offload infrastructure for GRO (udp_offload). This has the advantage of not being limited to provide offload on a per port basis, GRO is now applied to whatever individual UDP sockets are bound to. This also allows the possbility of "application defined GRO"-- that is we can attach something like a BPF program to a UDP socket to perfrom GRO on an application layer protocol. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 8 ++++++++ include/net/udp.h | 7 +++++-- net/ipv4/udp_offload.c | 52 +++++++++++++++++++------------------------------- net/ipv6/Makefile | 5 +++-- net/ipv6/af_inet6.c | 8 ++++++++ net/ipv6/ip6_offload.c | 2 -- net/ipv6/ip6_offload.h | 3 ++- net/ipv6/udp_offload.c | 11 ++++++++--- 8 files changed, 54 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 32342754643a..d1fd8cd39478 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -71,6 +71,14 @@ struct udp_sock { */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); + + /* GRO functions for UDP socket */ + struct sk_buff ** (*gro_receive)(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sock *sk, + struct sk_buff *skb, + int nhoff); }; static inline struct udp_sock *udp_sk(const struct sock *sk) diff --git a/include/net/udp.h b/include/net/udp.h index 3aa0b3ec1fb0..3c5a65e0946d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,9 +167,12 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } +typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport, + __be16 dport); + struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, - struct udphdr *uh); -int udp_gro_complete(struct sk_buff *skb, int nhoff); + struct udphdr *uh, udp_lookup_t lookup); +int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0ed2dafb7cc4..65c3fd34b363 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -179,6 +179,7 @@ out_unlock: return segs; } +EXPORT_SYMBOL(skb_udp_tunnel_segment); static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) @@ -304,13 +305,13 @@ unlock: EXPORT_SYMBOL(udp_del_offload); struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, - struct udphdr *uh) + struct udphdr *uh, udp_lookup_t lookup) { - struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; struct udphdr *uh2; unsigned int off = skb_gro_offset(skb); int flush = 1; + struct sock *sk; if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && @@ -322,13 +323,11 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); - uo_priv = rcu_dereference(udp_offload_base); - for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && - uo_priv->offload->port == uh->dest && - uo_priv->offload->callbacks.gro_receive) - goto unflush; - } + sk = (*lookup)(skb, uh->source, uh->dest); + + if (sk && udp_sk(sk)->gro_receive) + goto unflush; + goto out_unlock; unflush: @@ -352,9 +351,7 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb, - uo_priv->offload); + pp = udp_sk(sk)->gro_receive(sk, head, skb); out_unlock: rcu_read_unlock(); @@ -362,6 +359,7 @@ out: NAPI_GRO_CB(skb)->flush |= flush; return pp; } +EXPORT_SYMBOL(udp_gro_receive); static struct sk_buff **udp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) @@ -383,39 +381,28 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head, inet_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; - return udp_gro_receive(head, skb, uh); + return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb); flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; } -int udp_gro_complete(struct sk_buff *skb, int nhoff) +int udp_gro_complete(struct sk_buff *skb, int nhoff, + udp_lookup_t lookup) { - struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); int err = -ENOSYS; + struct sock *sk; uh->len = newlen; rcu_read_lock(); - - uo_priv = rcu_dereference(udp_offload_base); - for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && - uo_priv->offload->port == uh->dest && - uo_priv->offload->callbacks.gro_complete) - break; - } - - if (uo_priv) { - NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - err = uo_priv->offload->callbacks.gro_complete(skb, - nhoff + sizeof(struct udphdr), - uo_priv->offload); - } - + sk = (*lookup)(skb, uh->source, uh->dest); + if (sk && udp_sk(sk)->gro_complete) + err = udp_sk(sk)->gro_complete(sk, skb, + nhoff + sizeof(struct udphdr)); rcu_read_unlock(); if (skb->remcsum_offload) @@ -426,6 +413,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) return err; } +EXPORT_SYMBOL(udp_gro_complete); static int udp4_gro_complete(struct sk_buff *skb, int nhoff) { @@ -440,7 +428,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } - return udp_gro_complete(skb, nhoff); + return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } static const struct net_offload udpv4_offload = { diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2fbd90bf8d33..5e9d6bf4aaca 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,9 +8,10 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ - exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o + exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ + udp_offload.o -ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o +ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2b78aad0d52f..bfa86f040c16 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -64,6 +64,8 @@ #include #include +#include "ip6_offload.h" + MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); @@ -959,6 +961,10 @@ static int __init inet6_init(void) if (err) goto udplitev6_fail; + err = udpv6_offload_init(); + if (err) + goto udpv6_offload_fail; + err = tcpv6_init(); if (err) goto tcpv6_fail; @@ -988,6 +994,8 @@ pingv6_fail: ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: + udpv6_offload_exit(); +udpv6_offload_fail: udplitev6_exit(); udplitev6_fail: udpv6_exit(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 82e9f3076028..204af2219471 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -325,8 +325,6 @@ static int __init ipv6_offload_init(void) if (tcpv6_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); - if (udp_offload_init() < 0) - pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (ipv6_exthdrs_offload_init() < 0) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h index 2e155c651b35..96b40e41ac53 100644 --- a/net/ipv6/ip6_offload.h +++ b/net/ipv6/ip6_offload.h @@ -12,7 +12,8 @@ #define __ip6_offload_h int ipv6_exthdrs_offload_init(void); -int udp_offload_init(void); +int udpv6_offload_init(void); +int udpv6_offload_exit(void); int tcpv6_offload_init(void); #endif diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 2b0fbe6929e8..5429f6bcf047 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -153,7 +153,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head, skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; - return udp_gro_receive(head, skb, uh); + return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb); flush: NAPI_GRO_CB(skb)->flush = 1; @@ -173,7 +173,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } - return udp_gro_complete(skb, nhoff); + return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } static const struct net_offload udpv6_offload = { @@ -184,7 +184,12 @@ static const struct net_offload udpv6_offload = { }, }; -int __init udp_offload_init(void) +int udpv6_offload_init(void) { return inet6_add_offload(&udpv6_offload, IPPROTO_UDP); } + +int udpv6_offload_exit(void) +{ + return inet6_del_offload(&udpv6_offload, IPPROTO_UDP); +} -- cgit v1.2.3 From 46aa2f30aa7fe03a4dcd732b009284c02ff4f093 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 5 Apr 2016 08:22:56 -0700 Subject: udp: Remove udp_offloads Now that the UDP encapsulation GRO functions have been moved to the UDP socket we not longer need the udp_offload insfrastructure so removing it. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ------------- include/net/protocol.h | 3 --- net/ipv4/udp_offload.c | 63 ----------------------------------------------- 3 files changed, 83 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb0d5d09c2e4..cb4e508b3f38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2159,23 +2159,6 @@ struct packet_offload { struct list_head list; }; -struct udp_offload; - -struct udp_offload_callbacks { - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff); - int (*gro_complete)(struct sk_buff *skb, - int nhoff, - struct udp_offload *uoff); -}; - -struct udp_offload { - __be16 port; - u8 ipproto; - struct udp_offload_callbacks callbacks; -}; - /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; diff --git a/include/net/protocol.h b/include/net/protocol.h index da689f5432de..bf36ca34af7a 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -107,9 +107,6 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num); void inet_register_protosw(struct inet_protosw *p); void inet_unregister_protosw(struct inet_protosw *p); -int udp_add_offload(struct net *net, struct udp_offload *prot); -void udp_del_offload(struct udp_offload *prot); - #if IS_ENABLED(CONFIG_IPV6) int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 65c3fd34b363..6230cf4b0d2d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -14,18 +14,6 @@ #include #include -static DEFINE_SPINLOCK(udp_offload_lock); -static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; - -#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) - -struct udp_offload_priv { - struct udp_offload *offload; - possible_net_t net; - struct rcu_head rcu; - struct udp_offload_priv __rcu *next; -}; - static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, @@ -254,56 +242,6 @@ out: return segs; } -int udp_add_offload(struct net *net, struct udp_offload *uo) -{ - struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); - - if (!new_offload) - return -ENOMEM; - - write_pnet(&new_offload->net, net); - new_offload->offload = uo; - - spin_lock(&udp_offload_lock); - new_offload->next = udp_offload_base; - rcu_assign_pointer(udp_offload_base, new_offload); - spin_unlock(&udp_offload_lock); - - return 0; -} -EXPORT_SYMBOL(udp_add_offload); - -static void udp_offload_free_routine(struct rcu_head *head) -{ - struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); - kfree(ou_priv); -} - -void udp_del_offload(struct udp_offload *uo) -{ - struct udp_offload_priv __rcu **head = &udp_offload_base; - struct udp_offload_priv *uo_priv; - - spin_lock(&udp_offload_lock); - - uo_priv = udp_deref_protected(*head); - for (; uo_priv != NULL; - uo_priv = udp_deref_protected(*head)) { - if (uo_priv->offload == uo) { - rcu_assign_pointer(*head, - udp_deref_protected(uo_priv->next)); - goto unlock; - } - head = &uo_priv->next; - } - pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); -unlock: - spin_unlock(&udp_offload_lock); - if (uo_priv) - call_rcu(&uo_priv->rcu, udp_offload_free_routine); -} -EXPORT_SYMBOL(udp_del_offload); - struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup) { @@ -327,7 +265,6 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, if (sk && udp_sk(sk)->gro_receive) goto unflush; - goto out_unlock; unflush: -- cgit v1.2.3 From ec5e099d6e941668d121ea9ca7057f4fa00830b0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:22 -0700 Subject: perf: optimize perf_fetch_caller_regs avoid memset in perf_fetch_caller_regs, since it's the critical path of all tracepoints. It's called from perf_sw_event_sched, perf_event_task_sched_in and all of perf_trace_##call with this_cpu_ptr(&__perf_regs[..]) which are zero initialized by perpcu init logic and subsequent call to perf_arch_fetch_caller_regs initializes the same fields on all archs, so we can safely drop memset from all of the above cases and move it into perf_ftrace_function_call that calls it with stack allocated pt_regs. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/perf_event.h | 2 -- kernel/trace/trace_event_perf.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f291275ffd71..e89f7199c223 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -882,8 +882,6 @@ static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned lo */ static inline void perf_fetch_caller_regs(struct pt_regs *regs) { - memset(regs, 0, sizeof(*regs)); - perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 00df25fd86ef..7a68afca8249 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -316,6 +316,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); + memset(®s, 0, sizeof(regs)); perf_fetch_caller_regs(®s); entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); -- cgit v1.2.3 From 1e1dcd93b468901e114f279c94a0b356adc5e7cd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:24 -0700 Subject: perf: split perf_trace_buf_prepare into alloc and update parts split allows to move expensive update of 'struct trace_entry' to later phase. Repurpose unused 1st argument of perf_tp_event() to indicate event type. While splitting use temp variable 'rctx' instead of '*rctx' to avoid unnecessary loads done by the compiler due to -fno-strict-aliasing Signed-off-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Signed-off-by: David S. Miller --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 8 ++++---- include/trace/perf.h | 8 ++++---- kernel/events/core.c | 6 ++++-- kernel/trace/trace_event_perf.c | 39 ++++++++++++++++++++------------------- kernel/trace/trace_kprobe.c | 10 ++++++---- kernel/trace/trace_syscalls.c | 13 +++++++------ kernel/trace/trace_uprobe.c | 5 +++-- 8 files changed, 49 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e89f7199c223..eb41b535ef38 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1016,7 +1016,7 @@ static inline bool perf_paranoid_kernel(void) } extern void perf_event_init(void); -extern void perf_tp_event(u64 addr, u64 count, void *record, +extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0810f81b6db2..56f795e6a093 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -605,15 +605,15 @@ extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); -extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs **regs, int *rctxp); +void perf_trace_buf_update(void *record, u16 type); +void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); static inline void -perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, +perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, struct task_struct *task) { - perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif diff --git a/include/trace/perf.h b/include/trace/perf.h index 6f7e37869065..77cd9043b7e4 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -53,8 +53,7 @@ perf_trace_##call(void *__data, proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - entry = perf_trace_buf_prepare(__entry_size, \ - event_call->event.type, &__regs, &rctx); \ + entry = perf_trace_buf_alloc(__entry_size, &__regs, &rctx); \ if (!entry) \ return; \ \ @@ -64,8 +63,9 @@ perf_trace_##call(void *__data, proto) \ \ { assign; } \ \ - perf_trace_buf_submit(entry, __entry_size, rctx, 0, \ - __count, __regs, head, __task); \ + perf_trace_buf_submit(entry, __entry_size, rctx, \ + event_call->event.type, __count, __regs, \ + head, __task); \ } /* diff --git a/kernel/events/core.c b/kernel/events/core.c index de24fbce5277..d8512883c0a0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6987,7 +6987,7 @@ static int perf_tp_event_match(struct perf_event *event, return 1; } -void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, +void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task) { @@ -6999,9 +6999,11 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, .data = record, }; - perf_sample_data_init(&data, addr, 0); + perf_sample_data_init(&data, 0, 0); data.raw = &raw; + perf_trace_buf_update(record, event_type); + hlist_for_each_entry_rcu(event, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 7a68afca8249..5a927075977f 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -260,42 +260,43 @@ void perf_trace_del(struct perf_event *p_event, int flags) tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } -void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs **regs, int *rctxp) +void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) { - struct trace_entry *entry; - unsigned long flags; char *raw_data; - int pc; + int rctx; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "perf buffer not large enough")) + "perf buffer not large enough")) return NULL; - pc = preempt_count(); - - *rctxp = perf_swevent_get_recursion_context(); - if (*rctxp < 0) + *rctxp = rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) return NULL; if (regs) - *regs = this_cpu_ptr(&__perf_regs[*rctxp]); - raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); + *regs = this_cpu_ptr(&__perf_regs[rctx]); + raw_data = this_cpu_ptr(perf_trace_buf[rctx]); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); + return raw_data; +} +EXPORT_SYMBOL_GPL(perf_trace_buf_alloc); +NOKPROBE_SYMBOL(perf_trace_buf_alloc); + +void perf_trace_buf_update(void *record, u16 type) +{ + struct trace_entry *entry = record; + int pc = preempt_count(); + unsigned long flags; - entry = (struct trace_entry *)raw_data; local_save_flags(flags); tracing_generic_entry_update(entry, flags, pc); entry->type = type; - - return raw_data; } -EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); -NOKPROBE_SYMBOL(perf_trace_buf_prepare); +NOKPROBE_SYMBOL(perf_trace_buf_update); #ifdef CONFIG_FUNCTION_TRACER static void @@ -319,13 +320,13 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, memset(®s, 0, sizeof(regs)); perf_fetch_caller_regs(®s); - entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); + entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx); if (!entry) return; entry->ip = ip; entry->parent_ip = parent_ip; - perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, + perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, 1, ®s, head, NULL); #undef ENTRY_SIZE diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 919e0ddd8fcc..5546eec0505f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1149,14 +1149,15 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); + entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) return; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, + head, NULL); } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1184,14 +1185,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); + entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) return; entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, + head, NULL); } NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index e78f364cc192..b2b6efc083a4 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -587,15 +587,16 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, - sys_data->enter_event->event.type, NULL, &rctx); + rec = perf_trace_buf_alloc(size, NULL, &rctx); if (!rec) return; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(rec, size, rctx, + sys_data->enter_event->event.type, 1, regs, + head, NULL); } static int perf_sysenter_enable(struct trace_event_call *call) @@ -660,14 +661,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, - sys_data->exit_event->event.type, NULL, &rctx); + rec = perf_trace_buf_alloc(size, NULL, &rctx); if (!rec) return; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, + 1, regs, head, NULL); } static int perf_sysexit_enable(struct trace_event_call *call) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7915142c89e4..c53485441c88 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1131,7 +1131,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (hlist_empty(head)) goto out; - entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); + entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) goto out; @@ -1152,7 +1152,8 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, memset(data + len, 0, size - esize - len); } - perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, + head, NULL); out: preempt_enable(); } -- cgit v1.2.3 From 9940d67c93b5bb7ddcf862b41b1847cb728186c4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:27 -0700 Subject: bpf: support bpf_get_stackid() and bpf_perf_event_output() in tracepoint programs needs two wrapper functions to fetch 'struct pt_regs *' to convert tracepoint bpf context into kprobe bpf context to reuse existing helper functions Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + kernel/bpf/stackmap.c | 2 +- kernel/trace/bpf_trace.c | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 21ee41b92e8a..198f6ace70ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -160,6 +160,7 @@ struct bpf_array { #define MAX_TAIL_CALL_CNT 32 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 499d9e933f8e..35114725cf30 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -116,7 +116,7 @@ free_smap: return ERR_PTR(err); } -static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) { struct pt_regs *regs = (struct pt_regs *) (long) r1; struct bpf_map *map = (struct bpf_map *) (long) r2; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3e5ebe3254d2..413ec5614180 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -340,12 +340,52 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; +static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +{ + /* + * r1 points to perf tracepoint buffer where first 8 bytes are hidden + * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it + * from there and call the same bpf_perf_event_output() helper + */ + u64 ctx = *(long *)r1; + + return bpf_perf_event_output(ctx, r2, index, r4, size); +} + +static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { + .func = bpf_perf_event_output_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + +static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + u64 ctx = *(long *)r1; + + return bpf_get_stackid(ctx, r2, r3, r4, r5); +} + +static const struct bpf_func_proto bpf_get_stackid_proto_tp = { + .func = bpf_get_stackid_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: - return NULL; + return &bpf_get_stackid_proto_tp; default: return tracing_func_proto(func_id); } -- cgit v1.2.3 From 32bbe0078afe86a8bf4c67c6b3477781b15e94dc Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:28 -0700 Subject: bpf: sanitize bpf tracepoint access during bpf program loading remember the last byte of ctx access and at the time of attaching the program to tracepoint check that the program doesn't access bytes beyond defined in tracepoint fields This also disallows access to __dynamic_array fields, but can be relaxed in the future. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/linux/trace_events.h | 1 + kernel/bpf/verifier.c | 6 +++++- kernel/events/core.c | 8 ++++++++ kernel/trace/trace_events.c | 18 ++++++++++++++++++ 5 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 198f6ace70ec..b2365a6eba3d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -131,6 +131,7 @@ struct bpf_prog_type_list { struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; + u32 max_ctx_offset; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 56f795e6a093..fe6441203b59 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -569,6 +569,7 @@ extern int trace_define_field(struct trace_event_call *call, const char *type, int is_signed, int filter_type); extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); +extern int trace_event_get_offsets(struct trace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2e08f8e9b771..58792fed5678 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -652,8 +652,12 @@ static int check_ctx_access(struct verifier_env *env, int off, int size, enum bpf_access_type t) { if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t)) + env->prog->aux->ops->is_valid_access(off, size, t)) { + /* remember the offset of last byte accessed in ctx */ + if (env->prog->aux->max_ctx_offset < off + size) + env->prog->aux->max_ctx_offset = off + size; return 0; + } verbose("invalid bpf_context access off=%d size=%d\n", off, size); return -EACCES; diff --git a/kernel/events/core.c b/kernel/events/core.c index e5ffe97d6166..9a01019ff7c8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7133,6 +7133,14 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } + if (is_tracepoint) { + int off = trace_event_get_offsets(event->tp_event); + + if (prog->aux->max_ctx_offset > off) { + bpf_prog_put(prog); + return -EACCES; + } + } event->tp_event->prog = prog; return 0; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 05ddc0820771..ced963049e0a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -204,6 +204,24 @@ static void trace_destroy_fields(struct trace_event_call *call) } } +/* + * run-time version of trace_event_get_offsets_() that returns the last + * accessible offset of trace fields excluding __dynamic_array bytes + */ +int trace_event_get_offsets(struct trace_event_call *call) +{ + struct ftrace_event_field *tail; + struct list_head *head; + + head = trace_get_fields(call); + /* + * head->next points to the last field with the largest offset, + * since it was added last by trace_define_field() + */ + tail = list_first_entry(head, struct ftrace_event_field, link); + return tail->offset + tail->size; +} + int trace_event_raw_init(struct trace_event_call *call) { int id; -- cgit v1.2.3 From 1d111406c6d91f4d7f6cc69a43e59546e8010aae Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 20 Mar 2016 13:57:20 +0100 Subject: PCI: Add Intel Thunderbolt device IDs Intel Gen 1 and 2 chips use the same ID for NHI, bridges and switch. Gen 3 chips and onward use a distinct ID for the NHI. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Andreas Noever --- drivers/pci/quirks.c | 16 ++++++++++------ drivers/thunderbolt/nhi.c | 8 +++++--- drivers/thunderbolt/switch.c | 9 +++++---- include/linux/pci_ids.h | 18 ++++++++++++++++++ 4 files changed, 38 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8e678027b900..b584ddf83555 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3232,7 +3232,8 @@ static void quirk_apple_poweroff_thunderbolt(struct pci_dev *dev) acpi_execute_simple_method(SXIO, NULL, 0); acpi_execute_simple_method(SXLV, NULL, 0); } -DECLARE_PCI_FIXUP_SUSPEND_LATE(PCI_VENDOR_ID_INTEL, 0x1547, +DECLARE_PCI_FIXUP_SUSPEND_LATE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C, quirk_apple_poweroff_thunderbolt); /* @@ -3266,9 +3267,10 @@ static void quirk_apple_wait_for_thunderbolt(struct pci_dev *dev) if (!nhi) goto out; if (nhi->vendor != PCI_VENDOR_ID_INTEL - || (nhi->device != 0x1547 && nhi->device != 0x156c) - || nhi->subsystem_vendor != 0x2222 - || nhi->subsystem_device != 0x1111) + || (nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && + nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI) + || nhi->subsystem_vendor != 0x2222 + || nhi->subsystem_device != 0x1111) goto out; dev_info(&dev->dev, "quirk: waiting for thunderbolt to reestablish PCI tunnels...\n"); device_pm_wait_for_dev(&dev->dev, &nhi->dev); @@ -3276,9 +3278,11 @@ out: pci_dev_put(nhi); pci_dev_put(sibling); } -DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, 0x1547, +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C, quirk_apple_wait_for_thunderbolt); -DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, 0x156d, +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE, quirk_apple_wait_for_thunderbolt); #endif diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 20a41f7de76f..36be23babb89 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -633,16 +633,18 @@ static const struct dev_pm_ops nhi_pm_ops = { static struct pci_device_id nhi_ids[] = { /* * We have to specify class, the TB bridges use the same device and - * vendor (sub)id. + * vendor (sub)id on gen 1 and gen 2 controllers. */ { .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0, - .vendor = PCI_VENDOR_ID_INTEL, .device = 0x1547, + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C, .subvendor = 0x2222, .subdevice = 0x1111, }, { .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0, - .vendor = PCI_VENDOR_ID_INTEL, .device = 0x156c, + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, { 0,} diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index aeb982969629..db73ffed68a9 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -293,9 +293,9 @@ static int tb_plug_events_active(struct tb_switch *sw, bool active) if (active) { data = data & 0xFFFFFF83; switch (sw->config.device_id) { - case 0x1513: - case 0x151a: - case 0x1549: + case PCI_DEVICE_ID_INTEL_LIGHT_RIDGE: + case PCI_DEVICE_ID_INTEL_EAGLE_RIDGE: + case PCI_DEVICE_ID_INTEL_PORT_RIDGE: break; default: data |= 4; @@ -370,7 +370,8 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, u64 route) tb_sw_warn(sw, "unknown switch vendor id %#x\n", sw->config.vendor_id); - if (sw->config.device_id != 0x1547 && sw->config.device_id != 0x1549) + if (sw->config.device_id != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && + sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE) tb_sw_warn(sw, "unsupported switch device id %#x\n", sw->config.device_id); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 247da8c95860..c58752fe16c4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2604,6 +2604,24 @@ #define PCI_DEVICE_ID_INTEL_82441 0x1237 #define PCI_DEVICE_ID_INTEL_82380FB 0x124b #define PCI_DEVICE_ID_INTEL_82439 0x1250 +#define PCI_DEVICE_ID_INTEL_LIGHT_RIDGE 0x1513 /* Tbt 1 Gen 1 */ +#define PCI_DEVICE_ID_INTEL_EAGLE_RIDGE 0x151a +#define PCI_DEVICE_ID_INTEL_LIGHT_PEAK 0x151b +#define PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C 0x1547 /* Tbt 1 Gen 2 */ +#define PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_2C 0x1548 +#define PCI_DEVICE_ID_INTEL_PORT_RIDGE 0x1549 +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_2C_NHI 0x1566 /* Tbt 1 Gen 3 */ +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_2C_BRIDGE 0x1567 +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_4C_NHI 0x1568 +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_4C_BRIDGE 0x1569 +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI 0x156a /* Thunderbolt 2 */ +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE 0x156b +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI 0x156c +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE 0x156d +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_NHI 0x1575 /* Thunderbolt 3 */ +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_BRIDGE 0x1576 +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577 +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578 #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 -- cgit v1.2.3 From 6c9d9c81924b4b63c7a487e90fddb3b2d0f7d458 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Apr 2016 23:38:46 +0200 Subject: cpufreq: Call cpufreq_disable_fast_switch() in sugov_exit() Due to differences in the cpufreq core's handling of runtime CPU offline and nonboot CPUs disabling during system suspend-to-RAM, fast frequency switching gets disabled after a suspend-to-RAM and resume cycle on all of the nonboot CPUs. To prevent that from happening, move the invocation of cpufreq_disable_fast_switch() from cpufreq_exit_governor() to sugov_exit(), as the schedutil governor is the only user of fast frequency switching today anyway. That simply prevents cpufreq_disable_fast_switch() from being called without invoking the ->governor callback for the CPUFREQ_GOV_POLICY_EXIT event (which happens during system suspend now). Fixes: b7898fda5bc7 (cpufreq: Support for fast frequency switching) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 19 +++++++++++-------- include/linux/cpufreq.h | 1 + kernel/sched/cpufreq_schedutil.c | 2 ++ 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a5b7d77d4816..77d77a4e3b74 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -77,7 +77,11 @@ static inline bool has_target(void) static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static int cpufreq_start_governor(struct cpufreq_policy *policy); -static int cpufreq_exit_governor(struct cpufreq_policy *policy); + +static inline int cpufreq_exit_governor(struct cpufreq_policy *policy) +{ + return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); +} /** * Two notifier lists: the "policy" list is involved in the @@ -482,7 +486,11 @@ void cpufreq_enable_fast_switch(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_enable_fast_switch); -static void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) +/** + * cpufreq_disable_fast_switch - Disable fast frequency switching for policy. + * @policy: cpufreq policy to disable fast frequency switching for. + */ +void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) { mutex_lock(&cpufreq_fast_switch_lock); if (policy->fast_switch_enabled) { @@ -492,6 +500,7 @@ static void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) } mutex_unlock(&cpufreq_fast_switch_lock); } +EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); /********************************************************************* * SYSFS INTERFACE * @@ -2060,12 +2069,6 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy) return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); } -static int cpufreq_exit_governor(struct cpufreq_policy *policy) -{ - cpufreq_disable_fast_switch(policy); - return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); -} - int cpufreq_register_governor(struct cpufreq_governor *governor) { int err; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 55e69ebb035c..4e81e08db752 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -168,6 +168,7 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); +void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); #else static inline unsigned int cpufreq_get(unsigned int cpu) { diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d27ae064b476..154ae3a51e86 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -398,6 +398,8 @@ static int sugov_exit(struct cpufreq_policy *policy) struct sugov_tunables *tunables = sg_policy->tunables; unsigned int count; + cpufreq_disable_fast_switch(policy); + mutex_lock(&global_tunables_lock); count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); -- cgit v1.2.3 From 7e67e239a4f32fa3e2d51dd9a7930018651635b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 30 Mar 2016 13:45:25 +0530 Subject: cpufreq: dt: Include types.h from cpufreq-dt.h cpufreq-dt.h uses 'bool' data type but doesn't include types.h. It works fine for now as the files that include cpufreq-dt.h, also include types.h directly or indirectly. But, when a file includes cpufreq-dt.h without including types.h, we get a build error. Avoid such errors by including types.h in cpufreq-dt itself. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq-dt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h index 0414009e2c30..a87335a1660c 100644 --- a/include/linux/cpufreq-dt.h +++ b/include/linux/cpufreq-dt.h @@ -10,6 +10,8 @@ #ifndef __CPUFREQ_DT_H__ #define __CPUFREQ_DT_H__ +#include + struct cpufreq_dt_platform_data { /* * True when each CPU has its own clock to control its -- cgit v1.2.3 From 357f435d8a0d32068c75f3c7176434d992b3adb7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Apr 2016 19:05:19 -0400 Subject: fix the copy vs. map logics in blk_rq_map_user_iov() Signed-off-by: Al Viro --- block/blk-map.c | 47 ++++++++--------------------------------------- include/linux/uio.h | 1 + lib/iov_iter.c | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/block/blk-map.c b/block/blk-map.c index a54f0543b956..b9f88b7751fb 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -9,24 +9,6 @@ #include "blk.h" -static bool iovec_gap_to_prv(struct request_queue *q, - struct iovec *prv, struct iovec *cur) -{ - unsigned long prev_end; - - if (!queue_virt_boundary(q)) - return false; - - if (prv->iov_base == NULL && prv->iov_len == 0) - /* prv is not set - don't check */ - return false; - - prev_end = (unsigned long)(prv->iov_base + prv->iov_len); - - return (((unsigned long)cur->iov_base & queue_virt_boundary(q)) || - prev_end & queue_virt_boundary(q)); -} - int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio) { @@ -125,31 +107,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, const struct iov_iter *iter, gfp_t gfp_mask) { - struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0}; - bool copy = (q->dma_pad_mask & iter->count) || map_data; + bool copy = false; + unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); struct bio *bio = NULL; struct iov_iter i; int ret; - if (!iter || !iter->count) - return -EINVAL; - - iov_for_each(iov, i, *iter) { - unsigned long uaddr = (unsigned long) iov.iov_base; - - if (!iov.iov_len) - return -EINVAL; - - /* - * Keep going so we check length of all segments - */ - if ((uaddr & queue_dma_alignment(q)) || - iovec_gap_to_prv(q, &prv, &iov)) - copy = true; - - prv.iov_base = iov.iov_base; - prv.iov_len = iov.iov_len; - } + if (map_data) + copy = true; + else if (iov_iter_alignment(iter) & align) + copy = true; + else if (queue_virt_boundary(q)) + copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); i = *iter; do { diff --git a/include/linux/uio.h b/include/linux/uio.h index fd9bcfedad42..1b5d1cd796e2 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -87,6 +87,7 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); +unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 5fecddc32b1b..ca5316e0087b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -569,6 +569,25 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_alignment); +unsigned long iov_iter_gap_alignment(const struct iov_iter *i) +{ + unsigned long res = 0; + size_t size = i->count; + if (!size) + return 0; + + iterate_all_kinds(i, size, v, + (res |= (!res ? 0 : (unsigned long)v.iov_base) | + (size != v.iov_len ? size : 0), 0), + (res |= (!res ? 0 : (unsigned long)v.bv_offset) | + (size != v.bv_len ? size : 0)), + (res |= (!res ? 0 : (unsigned long)v.iov_base) | + (size != v.iov_len ? size : 0)) + ); + return res; +} +EXPORT_SYMBOL(iov_iter_gap_alignment); + ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) -- cgit v1.2.3 From 1373718194ebebc43c00d8f117e03885749495b0 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 22 Mar 2016 08:51:10 +0800 Subject: ACPI / PM: Introduce efi poweroff for HW-full platforms without _S5 The problem is Linux registers pm_power_off = efi_power_off only if we are in hardware reduced mode. Actually, what we also want is to do this when ACPI S5 is simply not supported on non-legacy platforms. Since some future Intel platforms are HW-full mode where the DSDT fails to supply an _S5 object(without SLP_TYP), we should let such kind of platform to leverage efi runtime service to poweroff. This patch uses efi power off as first choice when S5 is unavailable, even if there is a customized poweroff(driver provided, eg). Meanwhile, the legacy platforms will not be affected because there is no path for them to overwrite the pm_power_off to efi power off. Suggested-by: Len Brown Reviewed-by: Matt Fleming Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- arch/x86/platform/efi/quirks.c | 2 +- drivers/acpi/sleep.c | 7 +++++++ include/linux/acpi.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index ab50ada1d56e..818d12ad7761 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -373,5 +373,5 @@ bool efi_reboot_required(void) bool efi_poweroff_required(void) { - return !!acpi_gbl_reduced_hardware; + return acpi_gbl_reduced_hardware || acpi_no_s5; } diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 2a8b59644297..7a2e4d45b266 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -26,6 +26,11 @@ #include "internal.h" #include "sleep.h" +/* + * Some HW-full platforms do not have _S5, so they may need + * to leverage efi power off for a shutdown. + */ +bool acpi_no_s5; static u8 sleep_states[ACPI_S_STATE_COUNT]; static void acpi_sleep_tts_switch(u32 acpi_state) @@ -882,6 +887,8 @@ int __init acpi_sleep_init(void) sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; + } else { + acpi_no_s5 = true; } supported[0] = 0; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 06ed7e54033e..4d2e67f633b6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -278,6 +278,7 @@ void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; extern unsigned int acpi_sci_irq; +extern bool acpi_no_s5; #define INVALID_ACPI_IRQ ((unsigned)-1) static inline bool acpi_sci_irq_valid(void) { -- cgit v1.2.3 From f4d05266032346531b9f889e26aa31a0cf2a9822 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 29 Mar 2016 14:52:23 +0300 Subject: device property: don't bother the drivers with struct property_set Since device_add_property_set() now always takes a copy of the property_set, and also since the fwnode type is always hard coded to be FWNODE_PDATA, there is no need for the drivers to deliver the entire struct property_set. The function can just create the instance of it on its own and bind the properties from the drivers to it on the spot. This renames device_add_property_set() to device_add_properties(). The function now takes struct property_entry as its parameter instead of struct property_set. Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Acked-by: Thierry Reding Acked-by: Lee Jones Signed-off-by: Heikki Krogerus Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-pxa/raumfeld.c | 12 ++++-------- arch/arm/mach-tegra/board-paz00.c | 6 +----- drivers/base/platform.c | 19 ++++++++++--------- drivers/base/property.c | 34 +++++++++++++++++++++------------- drivers/mfd/intel-lpss-acpi.c | 12 ++---------- drivers/mfd/intel-lpss-pci.c | 20 ++++---------------- drivers/mfd/intel-lpss.c | 2 +- drivers/mfd/intel-lpss.h | 4 ++-- drivers/mfd/mfd-core.c | 4 ++-- include/linux/mfd/core.h | 4 ++-- include/linux/platform_device.h | 6 +++--- include/linux/property.h | 15 +++------------ 12 files changed, 55 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c index 5a941bd3dbed..e216433b56ed 100644 --- a/arch/arm/mach-pxa/raumfeld.c +++ b/arch/arm/mach-pxa/raumfeld.c @@ -385,10 +385,6 @@ static struct property_entry raumfeld_rotary_properties[] = { { }, }; -static struct property_set raumfeld_rotary_property_set = { - .properties = raumfeld_rotary_properties, -}; - static struct platform_device rotary_encoder_device = { .name = "rotary-encoder", .id = 0, @@ -1063,8 +1059,8 @@ static void __init __maybe_unused raumfeld_controller_init(void) pxa3xx_mfp_config(ARRAY_AND_SIZE(raumfeld_controller_pin_config)); gpiod_add_lookup_table(&raumfeld_rotary_gpios_table); - device_add_property_set(&rotary_encoder_device.dev, - &raumfeld_rotary_property_set); + device_add_properties(&rotary_encoder_device.dev, + raumfeld_rotary_properties); platform_device_register(&rotary_encoder_device); spi_register_board_info(ARRAY_AND_SIZE(controller_spi_devices)); @@ -1103,8 +1099,8 @@ static void __init __maybe_unused raumfeld_speaker_init(void) platform_device_register(&smc91x_device); gpiod_add_lookup_table(&raumfeld_rotary_gpios_table); - device_add_property_set(&rotary_encoder_device.dev, - &raumfeld_rotary_property_set); + device_add_properties(&rotary_encoder_device.dev, + raumfeld_rotary_properties); platform_device_register(&rotary_encoder_device); raumfeld_audio_init(); diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index 52db8bf7e153..7478f6fb3664 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -29,10 +29,6 @@ static struct property_entry __initdata wifi_rfkill_prop[] = { { }, }; -static struct property_set __initdata wifi_rfkill_pset = { - .properties = wifi_rfkill_prop, -}; - static struct platform_device wifi_rfkill_device = { .name = "rfkill_gpio", .id = -1, @@ -49,7 +45,7 @@ static struct gpiod_lookup_table wifi_gpio_lookup = { void __init tegra_paz00_wifikill_init(void) { - platform_device_add_properties(&wifi_rfkill_device, &wifi_rfkill_pset); + platform_device_add_properties(&wifi_rfkill_device, wifi_rfkill_prop); gpiod_add_lookup_table(&wifi_gpio_lookup); platform_device_register(&wifi_rfkill_device); } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index f437afa17f2b..6482d47deb50 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -322,16 +322,16 @@ EXPORT_SYMBOL_GPL(platform_device_add_data); /** * platform_device_add_properties - add built-in properties to a platform device * @pdev: platform device to add properties to - * @pset: properties to add + * @properties: null terminated array of properties to add * - * The function will take deep copy of the properties in @pset and attach - * the copy to the platform device. The memory associated with properties - * will be freed when the platform device is released. + * The function will take deep copy of @properties and attach the copy to the + * platform device. The memory associated with properties will be freed when the + * platform device is released. */ int platform_device_add_properties(struct platform_device *pdev, - const struct property_set *pset) + struct property_entry *properties) { - return device_add_property_set(&pdev->dev, pset); + return device_add_properties(&pdev->dev, properties); } EXPORT_SYMBOL_GPL(platform_device_add_properties); @@ -447,7 +447,7 @@ void platform_device_del(struct platform_device *pdev) release_resource(r); } - device_remove_property_set(&pdev->dev); + device_remove_properties(&pdev->dev); } } EXPORT_SYMBOL_GPL(platform_device_del); @@ -526,8 +526,9 @@ struct platform_device *platform_device_register_full( if (ret) goto err; - if (pdevinfo->pset) { - ret = platform_device_add_properties(pdev, pdevinfo->pset); + if (pdevinfo->properties) { + ret = platform_device_add_properties(pdev, + pdevinfo->properties); if (ret) goto err; } diff --git a/drivers/base/property.c b/drivers/base/property.c index 9b1a65debd49..210423d00d78 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -19,6 +19,11 @@ #include #include +struct property_set { + struct fwnode_handle fwnode; + struct property_entry *properties; +}; + static inline bool is_pset_node(struct fwnode_handle *fwnode) { return fwnode && fwnode->type == FWNODE_PDATA; @@ -801,14 +806,14 @@ static struct property_set *pset_copy_set(const struct property_set *pset) } /** - * device_remove_property_set - Remove properties from a device object. + * device_remove_properties - Remove properties from a device object. * @dev: Device whose properties to remove. * * The function removes properties previously associated to the device - * secondary firmware node with device_add_property_set(). Memory allocated + * secondary firmware node with device_add_properties(). Memory allocated * to the properties will also be released. */ -void device_remove_property_set(struct device *dev) +void device_remove_properties(struct device *dev) { struct fwnode_handle *fwnode; @@ -831,24 +836,27 @@ void device_remove_property_set(struct device *dev) } } } -EXPORT_SYMBOL_GPL(device_remove_property_set); +EXPORT_SYMBOL_GPL(device_remove_properties); /** - * device_add_property_set - Add a collection of properties to a device object. + * device_add_properties - Add a collection of properties to a device object. * @dev: Device to add properties to. - * @pset: Collection of properties to add. + * @properties: Collection of properties to add. * - * Associate a collection of device properties represented by @pset with @dev - * as its secondary firmware node. The function takes a copy of @pset. + * Associate a collection of device properties represented by @properties with + * @dev as its secondary firmware node. The function takes a copy of + * @properties. */ -int device_add_property_set(struct device *dev, const struct property_set *pset) +int device_add_properties(struct device *dev, struct property_entry *properties) { - struct property_set *p; + struct property_set *p, pset; - if (!pset) + if (!properties) return -EINVAL; - p = pset_copy_set(pset); + pset.properties = properties; + + p = pset_copy_set(&pset); if (IS_ERR(p)) return PTR_ERR(p); @@ -856,7 +864,7 @@ int device_add_property_set(struct device *dev, const struct property_set *pset) set_secondary_fwnode(dev, &p->fwnode); return 0; } -EXPORT_SYMBOL_GPL(device_add_property_set); +EXPORT_SYMBOL_GPL(device_add_properties); /** * device_get_next_child_node - Return the next child node handle for a device diff --git a/drivers/mfd/intel-lpss-acpi.c b/drivers/mfd/intel-lpss-acpi.c index 5a8d9c766633..7ddc4a9563ea 100644 --- a/drivers/mfd/intel-lpss-acpi.c +++ b/drivers/mfd/intel-lpss-acpi.c @@ -31,13 +31,9 @@ static struct property_entry spt_i2c_properties[] = { { }, }; -static struct property_set spt_i2c_pset = { - .properties = spt_i2c_properties, -}; - static const struct intel_lpss_platform_info spt_i2c_info = { .clk_rate = 120000000, - .pset = &spt_i2c_pset, + .properties = spt_i2c_properties, }; static const struct intel_lpss_platform_info bxt_info = { @@ -51,13 +47,9 @@ static struct property_entry bxt_i2c_properties[] = { { }, }; -static struct property_set bxt_i2c_pset = { - .properties = bxt_i2c_properties, -}; - static const struct intel_lpss_platform_info bxt_i2c_info = { .clk_rate = 133000000, - .pset = &bxt_i2c_pset, + .properties = bxt_i2c_properties, }; static const struct acpi_device_id intel_lpss_acpi_ids[] = { diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index a19e57118641..1d79a3c9370f 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -71,13 +71,9 @@ static struct property_entry spt_i2c_properties[] = { { }, }; -static struct property_set spt_i2c_pset = { - .properties = spt_i2c_properties, -}; - static const struct intel_lpss_platform_info spt_i2c_info = { .clk_rate = 120000000, - .pset = &spt_i2c_pset, + .properties = spt_i2c_properties, }; static struct property_entry uart_properties[] = { @@ -87,14 +83,10 @@ static struct property_entry uart_properties[] = { { }, }; -static struct property_set uart_pset = { - .properties = uart_properties, -}; - static const struct intel_lpss_platform_info spt_uart_info = { .clk_rate = 120000000, .clk_con_id = "baudclk", - .pset = &uart_pset, + .properties = uart_properties, }; static const struct intel_lpss_platform_info bxt_info = { @@ -104,7 +96,7 @@ static const struct intel_lpss_platform_info bxt_info = { static const struct intel_lpss_platform_info bxt_uart_info = { .clk_rate = 100000000, .clk_con_id = "baudclk", - .pset = &uart_pset, + .properties = uart_properties, }; static struct property_entry bxt_i2c_properties[] = { @@ -114,13 +106,9 @@ static struct property_entry bxt_i2c_properties[] = { { }, }; -static struct property_set bxt_i2c_pset = { - .properties = bxt_i2c_properties, -}; - static const struct intel_lpss_platform_info bxt_i2c_info = { .clk_rate = 133000000, - .pset = &bxt_i2c_pset, + .properties = bxt_i2c_properties, }; static const struct pci_device_id intel_lpss_pci_ids[] = { diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 1bbbe877ba7e..6352aaba96a4 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -407,7 +407,7 @@ int intel_lpss_probe(struct device *dev, if (ret) return ret; - lpss->cell->pset = info->pset; + lpss->cell->properties = info->properties; intel_lpss_init_dev(lpss); diff --git a/drivers/mfd/intel-lpss.h b/drivers/mfd/intel-lpss.h index 0dcea9eb2d03..694116630ffa 100644 --- a/drivers/mfd/intel-lpss.h +++ b/drivers/mfd/intel-lpss.h @@ -16,14 +16,14 @@ struct device; struct resource; -struct property_set; +struct property_entry; struct intel_lpss_platform_info { struct resource *mem; int irq; unsigned long clk_rate; const char *clk_con_id; - struct property_set *pset; + struct property_entry *properties; }; int intel_lpss_probe(struct device *dev, diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 88bd1b1e47be..fc1c1fc13813 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -193,8 +193,8 @@ static int mfd_add_device(struct device *parent, int id, goto fail_alias; } - if (cell->pset) { - ret = platform_device_add_properties(pdev, cell->pset); + if (cell->properties) { + ret = platform_device_add_properties(pdev, cell->properties); if (ret) goto fail_alias; } diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index bc6f7e00fb3d..9837f1e8c94c 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -17,7 +17,7 @@ #include struct irq_domain; -struct property_set; +struct property_entry; /* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */ struct mfd_cell_acpi_match { @@ -47,7 +47,7 @@ struct mfd_cell { size_t pdata_size; /* device properties passed to the sub devices drivers */ - const struct property_set *pset; + struct property_entry *properties; /* * Device Tree compatible string diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 03b755521fd9..98c2a7c7108e 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -18,7 +18,7 @@ #define PLATFORM_DEVID_AUTO (-2) struct mfd_cell; -struct property_set; +struct property_entry; struct platform_device { const char *name; @@ -73,7 +73,7 @@ struct platform_device_info { size_t size_data; u64 dma_mask; - const struct property_set *pset; + struct property_entry *properties; }; extern struct platform_device *platform_device_register_full( const struct platform_device_info *pdevinfo); @@ -172,7 +172,7 @@ extern int platform_device_add_resources(struct platform_device *pdev, extern int platform_device_add_data(struct platform_device *pdev, const void *data, size_t size); extern int platform_device_add_properties(struct platform_device *pdev, - const struct property_set *pset); + struct property_entry *properties); extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); diff --git a/include/linux/property.h b/include/linux/property.h index b51fcd36d892..ecab11e40794 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -238,18 +238,9 @@ struct property_entry { .name = _name_, \ } -/** - * struct property_set - Collection of "built-in" device properties. - * @fwnode: Handle to be pointed to by the fwnode field of struct device. - * @properties: Array of properties terminated with a null entry. - */ -struct property_set { - struct fwnode_handle fwnode; - struct property_entry *properties; -}; - -int device_add_property_set(struct device *dev, const struct property_set *pset); -void device_remove_property_set(struct device *dev); +int device_add_properties(struct device *dev, + struct property_entry *properties); +void device_remove_properties(struct device *dev); bool device_dma_supported(struct device *dev); -- cgit v1.2.3 From c68ae33e7fb4a010f9a48af3e4b87089dca96551 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 24 Mar 2016 13:15:20 +0100 Subject: ACPI / utils: Rename acpi_dev_present() acpi_dev_present() was originally named after pci_dev_present() to signify the similarity of the two functions. However Rafael J. Wysocki pointed out that the exported function acpi_dev_present() is easily confused with the non-exported acpi_device_is_present(). Additionally in ACPI parlance the term "present" usually refers to the "device is present" bit returned by the _STA control method, yet acpi_dev_present() merely checks presence in the namespace. It does not invoke _STA at all, let alone check the "device is present" bit. As suggested by Rafael, rename the function to acpi_dev_found() and adjust all existing call sites. Signed-off-by: Lukas Wunner Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 6 +++--- include/acpi/acpi_bus.h | 2 +- include/linux/apple-gmux.h | 2 +- sound/pci/hda/thinkpad_helper.c | 2 +- sound/soc/intel/boards/cht_bsw_max98090_ti.c | 2 +- sound/soc/intel/boards/cht_bsw_rt5645.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 050673f0c0b3..ac832bf6f8c9 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -707,7 +707,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, int rev, u64 funcs) EXPORT_SYMBOL(acpi_check_dsm); /** - * acpi_dev_present - Detect presence of a given ACPI device in the system. + * acpi_dev_found - Detect presence of a given ACPI device in the namespace. * @hid: Hardware ID of the device. * * Return %true if the device was present at the moment of invocation. @@ -719,7 +719,7 @@ EXPORT_SYMBOL(acpi_check_dsm); * instead). Calling from module_init() is fine (which is synonymous * with device_initcall()). */ -bool acpi_dev_present(const char *hid) +bool acpi_dev_found(const char *hid) { struct acpi_device_bus_id *acpi_device_bus_id; bool found = false; @@ -734,7 +734,7 @@ bool acpi_dev_present(const char *hid) return found; } -EXPORT_SYMBOL(acpi_dev_present); +EXPORT_SYMBOL(acpi_dev_found); /* * acpi_backlight= handling, this is done here rather then in video_detect.c diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 14362a84c78e..a84fd1533e24 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -87,7 +87,7 @@ acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, int rev, int func, .package.elements = (eles) \ } -bool acpi_dev_present(const char *hid); +bool acpi_dev_found(const char *hid); #ifdef CONFIG_ACPI diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h index b2d32e01dfe4..714186de8c36 100644 --- a/include/linux/apple-gmux.h +++ b/include/linux/apple-gmux.h @@ -35,7 +35,7 @@ */ static inline bool apple_gmux_present(void) { - return acpi_dev_present(GMUX_ACPI_HID); + return acpi_dev_found(GMUX_ACPI_HID); } #else /* !CONFIG_APPLE_GMUX */ diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index 59ab6cee1ad8..f0955fd7a2e7 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -13,7 +13,7 @@ static void (*old_vmaster_hook)(void *, int); static bool is_thinkpad(struct hda_codec *codec) { return (codec->core.subsystem_id >> 16 == 0x17aa) && - (acpi_dev_present("LEN0068") || acpi_dev_present("IBM0068")); + (acpi_dev_found("LEN0068") || acpi_dev_found("IBM0068")); } static void update_tpacpi_mute_led(void *private_data, int enabled) diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c index e609f089593a..ac60b04540fd 100644 --- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c +++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c @@ -296,7 +296,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) if (!drv) return -ENOMEM; - drv->ts3a227e_present = acpi_dev_present("104C227E"); + drv->ts3a227e_present = acpi_dev_found("104C227E"); if (!drv->ts3a227e_present) { /* no need probe TI jack detection chip */ snd_soc_card_cht.aux_dev = NULL; diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 2a6f80843bc9..3f2c1ea3a83e 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -357,7 +357,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(snd_soc_cards); i++) { - if (acpi_dev_present(snd_soc_cards[i].codec_id)) { + if (acpi_dev_found(snd_soc_cards[i].codec_id)) { dev_dbg(&pdev->dev, "found codec %s\n", snd_soc_cards[i].codec_id); card = snd_soc_cards[i].soc_card; -- cgit v1.2.3 From 32b9b10961860860268961d9aad0c56a73018c37 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Feb 2016 13:19:09 -0800 Subject: clk: Allow clocks to be marked as CRITICAL Critical clocks are those which must not be gated, else undefined or catastrophic failure would occur. Here we have chosen to ensure the prepare/enable counts are correctly incremented, so as not to confuse users with enabled clocks with no visible users. Signed-off-by: Lee Jones Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/1455225554-13267-2-git-send-email-mturquette@baylibre.com --- drivers/clk/clk.c | 5 +++++ include/linux/clk-provider.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index fb74dc1f7520..275201fd7b01 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2397,6 +2397,11 @@ static int __clk_core_init(struct clk_core *core) if (core->ops->init) core->ops->init(core->hw); + if (core->flags & CLK_IS_CRITICAL) { + clk_core_prepare(core); + clk_core_enable(core); + } + kref_init(&core->ref); out: clk_prepare_unlock(); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index da95258127aa..0638b4154502 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -32,6 +32,7 @@ #define CLK_GET_ACCURACY_NOCACHE BIT(8) /* do not use the cached clk accuracy */ #define CLK_RECALC_NEW_RATES BIT(9) /* recalc rates after notifications */ #define CLK_SET_RATE_UNGATE BIT(10) /* clock needs to run to set rate */ +#define CLK_IS_CRITICAL BIT(11) /* do not gate, ever */ struct clk; struct clk_hw; -- cgit v1.2.3 From d56f8994b6fb928f59481fabc25bcd1c2f9bd06d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Feb 2016 13:19:11 -0800 Subject: clk: Provide OF helper to mark clocks as CRITICAL This call matches clocks which have been marked as critical in DT and sets the appropriate flag. These flags can then be used to mark the clock core flags appropriately prior to registration. Legacy bindings requiring this feature must add the clock-critical property to their binding descriptions, as it is not a part of common-clock binding. Cc: devicetree@vger.kernel.org Signed-off-by: Lee Jones Reviewed-by: Stephen Boyd Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/1455225554-13267-4-git-send-email-mturquette@baylibre.com --- drivers/clk/clk.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/clk-provider.h | 8 +++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index dede0ce679e4..9f77cc67cdc3 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3137,6 +3137,41 @@ static int parent_ready(struct device_node *np) } } +/** + * of_clk_detect_critical() - set CLK_IS_CRITICAL flag from Device Tree + * @np: Device node pointer associated with clock provider + * @index: clock index + * @flags: pointer to clk_core->flags + * + * Detects if the clock-critical property exists and, if so, sets the + * corresponding CLK_IS_CRITICAL flag. + * + * Do not use this function. It exists only for legacy Device Tree + * bindings, such as the one-clock-per-node style that are outdated. + * Those bindings typically put all clock data into .dts and the Linux + * driver has no clock data, thus making it impossible to set this flag + * correctly from the driver. Only those drivers may call + * of_clk_detect_critical from their setup functions. + * + * Return: error code or zero on success + */ +int of_clk_detect_critical(struct device_node *np, + int index, unsigned long *flags) +{ + struct property *prop; + const __be32 *cur; + uint32_t idx; + + if (!np || !flags) + return -EINVAL; + + of_property_for_each_u32(np, "clock-critical", prop, cur, idx) + if (index == idx) + *flags |= CLK_IS_CRITICAL; + + return 0; +} + /** * of_clk_init() - Scan and init clock providers from the DT * @matches: array of compatible values and init functions for providers. diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0638b4154502..156286445a25 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -721,7 +721,8 @@ unsigned int of_clk_get_parent_count(struct device_node *np); int of_clk_parent_fill(struct device_node *np, const char **parents, unsigned int size); const char *of_clk_get_parent_name(struct device_node *np, int index); - +int of_clk_detect_critical(struct device_node *np, int index, + unsigned long *flags); void of_clk_init(const struct of_device_id *matches); #else /* !CONFIG_OF */ @@ -758,6 +759,11 @@ static inline const char *of_clk_get_parent_name(struct device_node *np, { return NULL; } +static inline int of_clk_detect_critical(struct device_node *np, int index, + unsigned long *flags) +{ + return 0; +} static inline void of_clk_init(const struct of_device_id *matches) {} #endif /* CONFIG_OF */ -- cgit v1.2.3 From b296821a7c42fa58baa17513b2b7b30ae66f3336 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Apr 2016 20:48:24 -0400 Subject: xattr_handler: pass dentry and inode as separate arguments of ->get() ... and do not assume they are already attached to each other Signed-off-by: Al Viro --- fs/9p/acl.c | 6 +++--- fs/9p/xattr.c | 4 ++-- fs/btrfs/xattr.c | 6 ++---- fs/ext2/xattr_security.c | 6 +++--- fs/ext2/xattr_trusted.c | 6 +++--- fs/ext2/xattr_user.c | 8 ++++---- fs/ext4/xattr_security.c | 6 +++--- fs/ext4/xattr_trusted.c | 6 +++--- fs/ext4/xattr_user.c | 8 ++++---- fs/f2fs/xattr.c | 14 ++++++-------- fs/gfs2/xattr.c | 6 +++--- fs/hfsplus/xattr.c | 10 +++++----- fs/hfsplus/xattr.h | 2 +- fs/hfsplus/xattr_security.c | 6 +++--- fs/hfsplus/xattr_trusted.c | 6 +++--- fs/hfsplus/xattr_user.c | 6 +++--- fs/jffs2/security.c | 6 +++--- fs/jffs2/xattr_trusted.c | 6 +++--- fs/jffs2/xattr_user.c | 6 +++--- fs/nfs/nfs4proc.c | 12 ++++++------ fs/ocfs2/xattr.c | 20 ++++++++++---------- fs/orangefs/xattr.c | 10 ++++++---- fs/posix_acl.c | 10 +++++----- fs/reiserfs/xattr_security.c | 9 ++++----- fs/reiserfs/xattr_trusted.c | 9 ++++----- fs/reiserfs/xattr_user.c | 9 ++++----- fs/squashfs/xattr.c | 6 ++++-- fs/xattr.c | 3 ++- fs/xfs/xfs_xattr.c | 6 +++--- include/linux/xattr.h | 3 ++- mm/shmem.c | 6 +++--- 31 files changed, 113 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 2d94e94b6b59..eb3589edf485 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -213,8 +213,8 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep, } static int v9fs_xattr_get_acl(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { struct v9fs_session_info *v9ses; struct posix_acl *acl; @@ -227,7 +227,7 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler, if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) return v9fs_xattr_get(dentry, handler->name, buffer, size); - acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags); + acl = v9fs_get_cached_acl(inode, handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 9dd9b47a6c1a..18c62bae9591 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -138,8 +138,8 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } static int v9fs_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { const char *full_name = xattr_full_name(handler, name); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 145d2b89e62d..03224b00ea70 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -369,11 +369,9 @@ err: } static int btrfs_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - name = xattr_full_name(handler, name); return __btrfs_getxattr(inode, name, buffer, size); } diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index ba97f243b050..7fd3b867ce65 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -9,10 +9,10 @@ static int ext2_xattr_security_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, buffer, size); } diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 2c94d1930626..0f85705ff519 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -16,10 +16,10 @@ ext2_xattr_trusted_list(struct dentry *dentry) static int ext2_xattr_trusted_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, buffer, size); } diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 72a2a96d677f..1fafd27037cc 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -18,12 +18,12 @@ ext2_xattr_user_list(struct dentry *dentry) static int ext2_xattr_user_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_USER, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); } diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 3e81bdca071a..123a7d010efe 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -13,10 +13,10 @@ static int ext4_xattr_security_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, buffer, size); } diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 2a3c6f9b8cb8..60652fa24cbc 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -20,10 +20,10 @@ ext4_xattr_trusted_list(struct dentry *dentry) static int ext4_xattr_trusted_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, void *buffer, - size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, buffer, size); } diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index d152f431e432..17a446ffecd3 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -19,12 +19,12 @@ ext4_xattr_user_list(struct dentry *dentry) static int ext4_xattr_user_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 06a72dc0191a..17fd2b1a6848 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -26,10 +26,10 @@ #include "xattr.h" static int f2fs_xattr_generic_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, void *buffer, - size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); switch (handler->flags) { case F2FS_XATTR_INDEX_USER: @@ -45,7 +45,7 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler, default: return -EINVAL; } - return f2fs_getxattr(d_inode(dentry), handler->flags, name, + return f2fs_getxattr(inode, handler->flags, name, buffer, size, NULL); } @@ -86,11 +86,9 @@ static bool f2fs_xattr_trusted_list(struct dentry *dentry) } static int f2fs_xattr_advise_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, void *buffer, - size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - if (buffer) *((char *)buffer) = F2FS_I(inode)->i_advise; return sizeof(char); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index e8dfb4740c04..619886ba6e78 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -584,10 +584,10 @@ out: * Returns: actual size of data on success, -errno on error */ static int gfs2_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct gfs2_inode *ip = GFS2_I(d_inode(dentry)); + struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_ea_location el; int type = handler->flags; int error; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index ab01530b4930..45dc4ae3791a 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -579,7 +579,7 @@ failed_getxattr_init: return res; } -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size, const char *prefix, size_t prefixlen) { @@ -594,7 +594,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, strcpy(xattr_name, prefix); strcpy(xattr_name + prefixlen, name); - res = __hfsplus_getxattr(d_inode(dentry), xattr_name, value, size); + res = __hfsplus_getxattr(inode, xattr_name, value, size); kfree(xattr_name); return res; @@ -844,8 +844,8 @@ end_removexattr: } static int hfsplus_osx_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { /* * Don't allow retrieving properly prefixed attributes @@ -860,7 +860,7 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler, * creates), so we pass the name through unmodified (after * ensuring it doesn't conflict with another namespace). */ - return __hfsplus_getxattr(d_inode(dentry), name, buffer, size); + return __hfsplus_getxattr(inode, name, buffer, size); } static int hfsplus_osx_setxattr(const struct xattr_handler *handler, diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h index f9b0955b3d28..d04ba6f58df2 100644 --- a/fs/hfsplus/xattr.h +++ b/fs/hfsplus/xattr.h @@ -28,7 +28,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size); -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size, const char *prefix, size_t prefixlen); diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index 72a68a3a0c99..ae2ca8c2e335 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -14,10 +14,10 @@ #include "acl.h" static int hfsplus_security_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return hfsplus_getxattr(dentry, name, buffer, size, + return hfsplus_getxattr(inode, name, buffer, size, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c index 95a7704c7abb..eae2947060aa 100644 --- a/fs/hfsplus/xattr_trusted.c +++ b/fs/hfsplus/xattr_trusted.c @@ -12,10 +12,10 @@ #include "xattr.h" static int hfsplus_trusted_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return hfsplus_getxattr(dentry, name, buffer, size, + return hfsplus_getxattr(inode, name, buffer, size, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c index 6fc269baf959..3c9eec3e4c7b 100644 --- a/fs/hfsplus/xattr_user.c +++ b/fs/hfsplus/xattr_user.c @@ -12,11 +12,11 @@ #include "xattr.h" static int hfsplus_user_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return hfsplus_getxattr(dentry, name, buffer, size, + return hfsplus_getxattr(inode, name, buffer, size, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index 7a28facd7175..3ed9a4b49778 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c @@ -49,10 +49,10 @@ int jffs2_init_security(struct inode *inode, struct inode *dir, /* ---- XATTR Handler for "security.*" ----------------- */ static int jffs2_security_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY, + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size); } diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index b2555ef07a12..4ebecff1d922 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c @@ -17,10 +17,10 @@ #include "nodelist.h" static int jffs2_trusted_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED, + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size); } diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index 539bd630b5e4..bce249e1b277 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c @@ -17,10 +17,10 @@ #include "nodelist.h" static int jffs2_user_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_USER, + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 327b8c34d360..7a7ac1dafa02 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6263,10 +6263,10 @@ static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, } static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler, - struct dentry *dentry, const char *key, - void *buf, size_t buflen) + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) { - return nfs4_proc_get_acl(d_inode(dentry), buf, buflen); + return nfs4_proc_get_acl(inode, buf, buflen); } static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) @@ -6288,11 +6288,11 @@ static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, } static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler, - struct dentry *dentry, const char *key, - void *buf, size_t buflen) + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) { if (security_ismaclabel(key)) - return nfs4_get_security_label(d_inode(dentry), buf, buflen); + return nfs4_get_security_label(inode, buf, buflen); return -EOPNOTSUPP; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 7d3d979f57d9..72eef4cfe8a9 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7250,10 +7250,10 @@ leave: * 'security' attributes support */ static int ocfs2_xattr_security_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, buffer, size); } @@ -7321,10 +7321,10 @@ const struct xattr_handler ocfs2_xattr_security_handler = { * 'trusted' attributes support */ static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, buffer, size); } @@ -7346,14 +7346,14 @@ const struct xattr_handler ocfs2_xattr_trusted_handler = { * 'user' attributes support */ static int ocfs2_xattr_user_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unusde, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) return -EOPNOTSUPP; - return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name, + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, buffer, size); } diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index ef5da7538cd5..6a4c0f7ce5c1 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -478,12 +478,13 @@ static int orangefs_xattr_set_default(const struct xattr_handler *handler, } static int orangefs_xattr_get_default(const struct xattr_handler *handler, - struct dentry *dentry, + struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - return orangefs_inode_getxattr(dentry->d_inode, + return orangefs_inode_getxattr(inode, ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, name, buffer, @@ -507,12 +508,13 @@ static int orangefs_xattr_set_trusted(const struct xattr_handler *handler, } static int orangefs_xattr_get_trusted(const struct xattr_handler *handler, - struct dentry *dentry, + struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - return orangefs_inode_getxattr(dentry->d_inode, + return orangefs_inode_getxattr(inode, ORANGEFS_XATTR_NAME_TRUSTED_PREFIX, name, buffer, diff --git a/fs/posix_acl.c b/fs/posix_acl.c index db1fb0f5d9ff..2c60f17e7d92 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -797,18 +797,18 @@ EXPORT_SYMBOL (posix_acl_to_xattr); static int posix_acl_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *value, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *value, size_t size) { struct posix_acl *acl; int error; - if (!IS_POSIXACL(d_backing_inode(dentry))) + if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; - if (d_is_symlink(dentry)) + if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - acl = get_acl(d_backing_inode(dentry), handler->flags); + acl = get_acl(inode, handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index ac7e104ada6b..86aeb9dd805a 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -9,14 +9,13 @@ #include static int -security_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *buffer, size_t size) +security_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - if (IS_PRIVATE(d_inode(dentry))) + if (IS_PRIVATE(inode)) return -EPERM; - return reiserfs_xattr_get(d_inode(dentry), - xattr_full_name(handler, name), + return reiserfs_xattr_get(inode, xattr_full_name(handler, name), buffer, size); } diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index cc248a581b60..31837f031f59 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -8,14 +8,13 @@ #include static int -trusted_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *buffer, size_t size) +trusted_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry))) + if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) return -EPERM; - return reiserfs_xattr_get(d_inode(dentry), - xattr_full_name(handler, name), + return reiserfs_xattr_get(inode, xattr_full_name(handler, name), buffer, size); } diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index caad583086af..f7c39731684b 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -7,13 +7,12 @@ #include static int -user_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *buffer, size_t size) +user_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - if (!reiserfs_xattrs_user(dentry->d_sb)) + if (!reiserfs_xattrs_user(inode->i_sb)) return -EOPNOTSUPP; - return reiserfs_xattr_get(d_inode(dentry), - xattr_full_name(handler, name), + return reiserfs_xattr_get(inode, xattr_full_name(handler, name), buffer, size); } diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c index 1e9de96288d8..1548b3784548 100644 --- a/fs/squashfs/xattr.c +++ b/fs/squashfs/xattr.c @@ -214,10 +214,12 @@ failed: static int squashfs_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *d, const char *name, + struct dentry *unused, + struct inode *inode, + const char *name, void *buffer, size_t size) { - return squashfs_xattr_get(d_inode(d), handler->flags, name, + return squashfs_xattr_get(inode, handler->flags, name, buffer, size); } diff --git a/fs/xattr.c b/fs/xattr.c index 4861322e28e8..461ba45b7da9 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -698,7 +698,8 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (IS_ERR(handler)) return PTR_ERR(handler); - return handler->get(handler, dentry, name, buffer, size); + return handler->get(handler, dentry, d_inode(dentry), + name, buffer, size); } /* diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 110f1d7d86b0..d111f691f313 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -32,11 +32,11 @@ static int -xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *value, size_t size) +xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *value, size_t size) { int xflags = handler->flags; - struct xfs_inode *ip = XFS_I(d_inode(dentry)); + struct xfs_inode *ip = XFS_I(inode); int error, asize = size; /* Convert Linux syscall to XFS internal ATTR flags */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4457541de3c9..c11c022298b9 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -30,7 +30,8 @@ struct xattr_handler { int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); int (*get)(const struct xattr_handler *, struct dentry *dentry, - const char *name, void *buffer, size_t size); + struct inode *inode, const char *name, void *buffer, + size_t size); int (*set)(const struct xattr_handler *, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags); diff --git a/mm/shmem.c b/mm/shmem.c index 9428c51ab2d6..00d5d025eece 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2646,10 +2646,10 @@ static int shmem_initxattrs(struct inode *inode, } static int shmem_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); + struct shmem_inode_info *info = SHMEM_I(inode); name = xattr_full_name(handler, name); return simple_xattr_get(&info->xattrs, name, buffer, size); -- cgit v1.2.3 From 3c9d6296b7aee536a96ea2b53a15d23511738c1c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 8 Apr 2016 12:20:30 +0200 Subject: security: drop the unused hook skb_owned_by The skb_owned_by hook was added with the commit ca10b9e9a8ca ("selinux: add a skb_owned_by() hook") and later removed when said commit was reverted. Later on, when switching to list of hooks, a field named 'skb_owned_by' was included into the security_hook_head struct, but without any users nor caller. This commit removes the said left-over field. Fixes: b1d9e6b0646d ("LSM: Switch to lists of hooks") Signed-off-by: Paolo Abeni Acked-by: Casey Schaufler Acked-by: Paul Moore Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 1 - security/security.c | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cdee11cbcdf1..ae2537886177 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1804,7 +1804,6 @@ struct security_hook_heads { struct list_head tun_dev_attach_queue; struct list_head tun_dev_attach; struct list_head tun_dev_open; - struct list_head skb_owned_by; #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM struct list_head xfrm_policy_alloc_security; diff --git a/security/security.c b/security/security.c index 3644b0344d29..554c3fb7d4a5 100644 --- a/security/security.c +++ b/security/security.c @@ -1848,7 +1848,6 @@ struct security_hook_heads security_hook_heads = { .tun_dev_attach = LIST_HEAD_INIT(security_hook_heads.tun_dev_attach), .tun_dev_open = LIST_HEAD_INIT(security_hook_heads.tun_dev_open), - .skb_owned_by = LIST_HEAD_INIT(security_hook_heads.skb_owned_by), #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = -- cgit v1.2.3 From ce23e640133484eebc20ca7b7668388213e11327 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Apr 2016 00:48:00 -0400 Subject: ->getxattr(): pass dentry and inode as separate arguments Signed-off-by: Al Viro --- Documentation/filesystems/porting | 6 ++++++ .../staging/lustre/lustre/llite/llite_internal.h | 4 ++-- drivers/staging/lustre/lustre/llite/xattr.c | 6 ++---- fs/bad_inode.c | 4 ++-- fs/ceph/super.h | 2 +- fs/ceph/xattr.c | 8 ++++---- fs/cifs/cifsfs.h | 2 +- fs/cifs/xattr.c | 6 +++--- fs/ecryptfs/crypto.c | 5 ++++- fs/ecryptfs/ecryptfs_kernel.h | 4 ++-- fs/ecryptfs/inode.c | 23 +++++++++++----------- fs/ecryptfs/mmap.c | 3 ++- fs/fuse/dir.c | 5 ++--- fs/gfs2/inode.c | 9 ++++----- fs/hfs/attr.c | 5 ++--- fs/hfs/hfs_fs.h | 4 ++-- fs/jfs/jfs_xattr.h | 2 +- fs/jfs/xattr.c | 8 ++++---- fs/kernfs/inode.c | 6 +++--- fs/kernfs/kernfs-internal.h | 4 ++-- fs/libfs.c | 4 ++-- fs/overlayfs/inode.c | 4 ++-- fs/overlayfs/overlayfs.h | 4 ++-- fs/overlayfs/super.c | 2 +- fs/ubifs/ubifs.h | 4 ++-- fs/ubifs/xattr.c | 6 +++--- fs/xattr.c | 11 ++++++----- include/linux/fs.h | 3 ++- include/linux/xattr.h | 2 +- net/socket.c | 2 +- security/commoncap.c | 6 +++--- security/integrity/evm/evm_main.c | 2 +- security/selinux/hooks.c | 9 +++++---- security/smack/smack_lsm.c | 4 ++-- 34 files changed, 94 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index f1b87d8aa2da..57bb3754a027 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -525,3 +525,9 @@ in your dentry operations instead. set_delayed_call() where it used to set *cookie. ->put_link() is gone - just give the destructor to set_delayed_call() in ->get_link(). +-- +[mandatory] + ->getxattr() and xattr_handler.get() get dentry and inode passed separately. + dentry might be yet to be attached to inode, so do _not_ use its ->d_inode + in the instances. Rationale: !@#!@# security_d_instantiate() needs to be + called before we attach dentry to inode. diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 3e1572cb457b..d28efd27af57 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -1042,8 +1042,8 @@ static inline __u64 ll_file_maxbytes(struct inode *inode) /* llite/xattr.c */ int ll_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size); ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_removexattr(struct dentry *dentry, const char *name); diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index b68dcc921ca2..c671f221c28c 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -451,11 +451,9 @@ out: return rc; } -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 103f5d7c3083..72e35b721608 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -106,8 +106,8 @@ static int bad_inode_setxattr(struct dentry *dentry, const char *name, return -EIO; } -static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) +static ssize_t bad_inode_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { return -EIO; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e705c4d612d7..beb893bb234f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -795,7 +795,7 @@ extern int ceph_setxattr(struct dentry *, const char *, const void *, int __ceph_setxattr(struct dentry *, const char *, const void *, size_t, int); ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t); int __ceph_removexattr(struct dentry *, const char *); -extern ssize_t ceph_getxattr(struct dentry *, const char *, void *, size_t); +extern ssize_t ceph_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); extern int ceph_removexattr(struct dentry *, const char *); extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9410abdef3ce..c6e917d360f7 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -804,13 +804,13 @@ out: return err; } -ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, - size_t size) +ssize_t ceph_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return generic_getxattr(dentry, name, value, size); + return generic_getxattr(dentry, inode, name, value, size); - return __ceph_getxattr(d_inode(dentry), name, value, size); + return __ceph_getxattr(inode, name, value, size); } ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 83aac8ba50b0..c89ecd7a5c39 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -123,7 +123,7 @@ extern int cifs_symlink(struct inode *inode, struct dentry *direntry, extern int cifs_removexattr(struct dentry *, const char *); extern int cifs_setxattr(struct dentry *, const char *, const void *, size_t, int); -extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); +extern ssize_t cifs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); #ifdef CONFIG_CIFS_NFSD_EXPORT diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 159547c8a40b..5d57c85703a9 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -213,8 +213,8 @@ set_ea_exit: return rc; } -ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, - void *ea_value, size_t buf_size) +ssize_t cifs_getxattr(struct dentry *direntry, struct inode *inode, + const char *ea_name, void *ea_value, size_t buf_size) { ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR @@ -296,7 +296,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, goto get_ea_exit; /* rc already EOPNOTSUPP */ pacl = pTcon->ses->server->ops->get_acl(cifs_sb, - d_inode(direntry), full_path, &acllen); + inode, full_path, &acllen); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); cifs_dbg(VFS, "%s: error %zd getting sec desc\n", diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 64026e53722a..543a146ee019 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1369,7 +1369,9 @@ int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode) ssize_t size; int rc = 0; - size = ecryptfs_getxattr_lower(lower_dentry, ECRYPTFS_XATTR_NAME, + size = ecryptfs_getxattr_lower(lower_dentry, + ecryptfs_inode_to_lower(ecryptfs_inode), + ECRYPTFS_XATTR_NAME, page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE); if (size < 0) { if (unlikely(ecryptfs_verbosity > 0)) @@ -1391,6 +1393,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, int rc; rc = ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), + ecryptfs_inode_to_lower(inode), ECRYPTFS_XATTR_NAME, file_size, ECRYPTFS_SIZE_AND_MARKER_BYTES); if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index d123fbaa28e0..6ff907f73331 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -607,8 +607,8 @@ ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, unsigned char *src, struct dentry *ecryptfs_dentry); int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); ssize_t -ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, - void *value, size_t size); +ecryptfs_getxattr_lower(struct dentry *lower_dentry, struct inode *lower_inode, + const char *name, void *value, size_t size); int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 121114e9a464..1ac631cd9d84 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1033,29 +1033,30 @@ out: } ssize_t -ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, - void *value, size_t size) +ecryptfs_getxattr_lower(struct dentry *lower_dentry, struct inode *lower_inode, + const char *name, void *value, size_t size) { int rc = 0; - if (!d_inode(lower_dentry)->i_op->getxattr) { + if (!lower_inode->i_op->getxattr) { rc = -EOPNOTSUPP; goto out; } - inode_lock(d_inode(lower_dentry)); - rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value, - size); - inode_unlock(d_inode(lower_dentry)); + inode_lock(lower_inode); + rc = lower_inode->i_op->getxattr(lower_dentry, lower_inode, + name, value, size); + inode_unlock(lower_inode); out: return rc; } static ssize_t -ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, - size_t size) +ecryptfs_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { - return ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), name, - value, size); + return ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), + ecryptfs_inode_to_lower(inode), + name, value, size); } static ssize_t diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 1f5865263b3e..39e4381d3a65 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -436,7 +436,8 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) goto out; } inode_lock(lower_inode); - size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME, + size = lower_inode->i_op->getxattr(lower_dentry, lower_inode, + ECRYPTFS_XATTR_NAME, xattr_virt, PAGE_CACHE_SIZE); if (size < 0) size = 8; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4b855b65d457..b618527c05c6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1759,10 +1759,9 @@ static int fuse_setxattr(struct dentry *entry, const char *name, return err; } -static ssize_t fuse_getxattr(struct dentry *entry, const char *name, - void *value, size_t size) +static ssize_t fuse_getxattr(struct dentry *entry, struct inode *inode, + const char *name, void *value, size_t size) { - struct inode *inode = d_inode(entry); struct fuse_conn *fc = get_fuse_conn(inode); FUSE_ARGS(args); struct fuse_getxattr_in inarg; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index bb30f9a72c65..45f516cada78 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1968,22 +1968,21 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name, return ret; } -static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, - void *data, size_t size) +static ssize_t gfs2_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *data, size_t size) { - struct inode *inode = d_inode(dentry); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; /* For selinux during lookup */ if (gfs2_glock_is_locked_by_me(ip->i_gl)) - return generic_getxattr(dentry, name, data, size); + return generic_getxattr(dentry, inode, name, data, size); gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = generic_getxattr(dentry, name, data, size); + ret = generic_getxattr(dentry, inode, name, data, size); gfs2_glock_dq(&gh); } gfs2_holder_uninit(&gh); diff --git a/fs/hfs/attr.c b/fs/hfs/attr.c index 8d931b157bbe..064f92f17efc 100644 --- a/fs/hfs/attr.c +++ b/fs/hfs/attr.c @@ -56,10 +56,9 @@ out: return res; } -ssize_t hfs_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) +ssize_t hfs_getxattr(struct dentry *unused, struct inode *inode, + const char *name, void *value, size_t size) { - struct inode *inode = d_inode(dentry); struct hfs_find_data fd; hfs_cat_rec rec; struct hfs_cat_file *file; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 1f1c7dcbcc2f..79daa097929a 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -213,8 +213,8 @@ extern void hfs_delete_inode(struct inode *); /* attr.c */ extern int hfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -extern ssize_t hfs_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size); +extern ssize_t hfs_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size); extern ssize_t hfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* mdb.c */ diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h index e8d717dabca3..e69e14f3777b 100644 --- a/fs/jfs/jfs_xattr.h +++ b/fs/jfs/jfs_xattr.h @@ -57,7 +57,7 @@ extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *, extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, int); extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); -extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); +extern ssize_t jfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); extern int jfs_removexattr(struct dentry *, const char *); diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 48b15a6e5558..5becc6a3ff8c 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -933,8 +933,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, return size; } -ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, - size_t buf_size) +ssize_t jfs_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *data, size_t buf_size) { int err; @@ -944,7 +944,7 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, * for it via sb->s_xattr. */ if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return generic_getxattr(dentry, name, data, buf_size); + return generic_getxattr(dentry, inode, name, data, buf_size); if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { /* @@ -959,7 +959,7 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, return -EOPNOTSUPP; } - err = __jfs_getxattr(d_inode(dentry), name, data, buf_size); + err = __jfs_getxattr(inode, name, data, buf_size); return err; } diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 16405ae88d2d..b5247226732b 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -208,10 +208,10 @@ int kernfs_iop_removexattr(struct dentry *dentry, const char *name) return simple_xattr_set(&attrs->xattrs, name, NULL, 0, XATTR_REPLACE); } -ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size) +ssize_t kernfs_iop_getxattr(struct dentry *unused, struct inode *inode, + const char *name, void *buf, size_t size) { - struct kernfs_node *kn = dentry->d_fsdata; + struct kernfs_node *kn = inode->i_private; struct kernfs_iattrs *attrs; attrs = kernfs_iattrs(kn); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 6762bfbd8207..45c9192c276e 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -84,8 +84,8 @@ int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry, int kernfs_iop_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int kernfs_iop_removexattr(struct dentry *dentry, const char *name); -ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size); +ssize_t kernfs_iop_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buf, size_t size); ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); /* diff --git a/fs/libfs.c b/fs/libfs.c index 0ca80b2af420..03332f4bdedf 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1127,8 +1127,8 @@ static int empty_dir_setxattr(struct dentry *dentry, const char *name, return -EOPNOTSUPP; } -static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) +static ssize_t empty_dir_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { return -EOPNOTSUPP; } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index a4ff5d0d7db9..c7b31a03dc9c 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -246,8 +246,8 @@ static bool ovl_need_xattr_filter(struct dentry *dentry, return false; } -ssize_t ovl_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) +ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { struct path realpath; enum ovl_path_type type = ovl_path_real(dentry, &realpath); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6a7090f4a441..99ec4b035237 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -173,8 +173,8 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_permission(struct inode *inode, int mask); int ovl_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ovl_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size); +ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); int ovl_removexattr(struct dentry *dentry, const char *name); struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ef64984c9bbc..14cab381cece 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -274,7 +274,7 @@ static bool ovl_is_opaquedir(struct dentry *dentry) if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) return false; - res = inode->i_op->getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); + res = inode->i_op->getxattr(dentry, inode, OVL_XATTR_OPAQUE, &val, 1); if (res == 1 && val == 'y') return true; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index c2a57e193a81..536fb495f2f1 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1734,8 +1734,8 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, /* xattr.c */ int ubifs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size); +ssize_t ubifs_getxattr(struct dentry *dentry, struct inode *host, + const char *name, void *buf, size_t size); ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); int ubifs_removexattr(struct dentry *dentry, const char *name); int ubifs_init_security(struct inode *dentry, struct inode *inode, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index b043e044121d..413d650c9476 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -372,10 +372,10 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, return setxattr(d_inode(dentry), name, value, size, flags); } -ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size) +ssize_t ubifs_getxattr(struct dentry *dentry, struct inode *host, + const char *name, void *buf, size_t size) { - struct inode *inode, *host = d_inode(dentry); + struct inode *inode; struct ubifs_info *c = host->i_sb->s_fs_info; struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_inode *ui; diff --git a/fs/xattr.c b/fs/xattr.c index 461ba45b7da9..b11945e15fde 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -192,7 +192,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, if (!inode->i_op->getxattr) return -EOPNOTSUPP; - error = inode->i_op->getxattr(dentry, name, NULL, 0); + error = inode->i_op->getxattr(dentry, inode, name, NULL, 0); if (error < 0) return error; @@ -203,7 +203,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, memset(value, 0, error + 1); } - error = inode->i_op->getxattr(dentry, name, value, error); + error = inode->i_op->getxattr(dentry, inode, name, value, error); *xattr_value = value; return error; } @@ -236,7 +236,7 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) } nolsm: if (inode->i_op->getxattr) - error = inode->i_op->getxattr(dentry, name, value, size); + error = inode->i_op->getxattr(dentry, inode, name, value, size); else error = -EOPNOTSUPP; @@ -691,14 +691,15 @@ xattr_resolve_name(const struct xattr_handler **handlers, const char **name) * Find the handler for the prefix and dispatch its get() operation. */ ssize_t -generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) +generic_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { const struct xattr_handler *handler; handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (IS_ERR(handler)) return PTR_ERR(handler); - return handler->get(handler, dentry, d_inode(dentry), + return handler->get(handler, dentry, inode, name, buffer, size); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 329ed372d708..1b5fcaeea827 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1702,7 +1702,8 @@ struct inode_operations { int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + ssize_t (*getxattr) (struct dentry *, struct inode *, + const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index c11c022298b9..1cc4c578deb9 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -52,7 +52,7 @@ int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, i int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); int vfs_removexattr(struct dentry *, const char *); -ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); +ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); int generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int generic_removexattr(struct dentry *dentry, const char *name); diff --git a/net/socket.c b/net/socket.c index 5f77a8e93830..35e4523edada 100644 --- a/net/socket.c +++ b/net/socket.c @@ -466,7 +466,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) #define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" #define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) #define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) -static ssize_t sockfs_getxattr(struct dentry *dentry, +static ssize_t sockfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { const char *proto_name; diff --git a/security/commoncap.c b/security/commoncap.c index 48071ed7c445..a042077312a5 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -313,7 +313,7 @@ int cap_inode_need_killpriv(struct dentry *dentry) if (!inode->i_op->getxattr) return 0; - error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); + error = inode->i_op->getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0); if (error <= 0) return 0; return 1; @@ -397,8 +397,8 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data if (!inode || !inode->i_op->getxattr) return -ENODATA; - size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps, - XATTR_CAPS_SZ); + size = inode->i_op->getxattr((struct dentry *)dentry, inode, + XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ); if (size == -ENODATA || size == -EOPNOTSUPP) /* no data, that's ok */ return -ENODATA; diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 84c6d11fc096..b9e26288d30c 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -82,7 +82,7 @@ static int evm_find_protected_xattrs(struct dentry *dentry) return -EOPNOTSUPP; for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) { - error = inode->i_op->getxattr(dentry, *xattr, NULL, 0); + error = inode->i_op->getxattr(dentry, inode, *xattr, NULL, 0); if (error < 0) { if (error == -ENODATA) continue; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 889cd59ca5a7..469f5c75bd4b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -506,7 +506,8 @@ static int sb_finish_set_opts(struct super_block *sb) rc = -EOPNOTSUPP; goto out; } - rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0); + rc = root_inode->i_op->getxattr(root, root_inode, + XATTR_NAME_SELINUX, NULL, 0); if (rc < 0 && rc != -ENODATA) { if (rc == -EOPNOTSUPP) printk(KERN_WARNING "SELinux: (dev %s, type " @@ -1412,13 +1413,13 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent goto out_unlock; } context[len] = '\0'; - rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, + rc = inode->i_op->getxattr(dentry, inode, XATTR_NAME_SELINUX, context, len); if (rc == -ERANGE) { kfree(context); /* Need a larger buffer. Query for the right size. */ - rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, + rc = inode->i_op->getxattr(dentry, inode, XATTR_NAME_SELINUX, NULL, 0); if (rc < 0) { dput(dentry); @@ -1432,7 +1433,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent goto out_unlock; } context[len] = '\0'; - rc = inode->i_op->getxattr(dentry, + rc = inode->i_op->getxattr(dentry, inode, XATTR_NAME_SELINUX, context, len); } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 50bcca26c0b7..ff2b8c3cf7a9 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -272,7 +272,7 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip, if (buffer == NULL) return ERR_PTR(-ENOMEM); - rc = ip->i_op->getxattr(dp, name, buffer, SMK_LONGLABEL); + rc = ip->i_op->getxattr(dp, ip, name, buffer, SMK_LONGLABEL); if (rc < 0) skp = ERR_PTR(rc); else if (rc == 0) @@ -3519,7 +3519,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) TRANS_TRUE, TRANS_TRUE_SIZE, 0); } else { - rc = inode->i_op->getxattr(dp, + rc = inode->i_op->getxattr(dp, inode, XATTR_NAME_SMACKTRANSMUTE, trattr, TRANS_TRUE_SIZE); if (rc >= 0 && strncmp(trattr, TRANS_TRUE, -- cgit v1.2.3 From adae28c59a6a71a971ffed713550911546df0e20 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Mar 2016 11:36:53 +0100 Subject: mfd: syscon: Include errno.h from header The syscon header uses the ENOTSUPP error constant, but doesn't include the header that defines it. This causes a build error after the imx pinctrl driver started using syscon: include/linux/mfd/syscon.h: In function 'syscon_node_to_regmap': include/linux/mfd/syscon.h:32:18: error: 'ENOTSUPP' undeclared (first use in this function) return ERR_PTR(-ENOTSUPP); ^~~~~~~~ This adds the missing #include. Signed-off-by: Arnd Bergmann Fixes: 8626ada871f1 ("pinctrl: imx: attach iomuxc device to gpr syscon") Signed-off-by: Lee Jones --- include/linux/mfd/syscon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index 1088149be0c9..40a76b97b7ab 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -16,6 +16,7 @@ #define __LINUX_MFD_SYSCON_H__ #include +#include struct device_node; -- cgit v1.2.3 From 40a865500c4d408b552cf5899bf091ac72e32bf8 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 7 Apr 2016 16:22:38 +0200 Subject: regulator: as3722: Add bypass support for LDO6 LD06 on the AS3722 power management IC supports a bypass mode. Bypass is enabled for the LDO by writing the value 0x3F to the voltage select field in the control register for the LDO. Note that this is the same register and field that is used to select the voltage as well for the LDO. Add support for bypass on LDO6 by specifying the various bypass parameters for regulator and adding new function pointer tables for the LDO. Note that the bypass OFF value is the same as the ON value simply because there is no actual OFF value and bypass will be disabled when a new voltage is written to the VSEL field. Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Mark Brown --- drivers/regulator/as3722-regulator.c | 43 ++++++++++++++++++++++++++++++++++++ include/linux/mfd/as3722.h | 1 + 2 files changed, 44 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/as3722-regulator.c b/drivers/regulator/as3722-regulator.c index 8b046eec6ae0..35a7c647f6a9 100644 --- a/drivers/regulator/as3722-regulator.c +++ b/drivers/regulator/as3722-regulator.c @@ -432,6 +432,31 @@ static struct regulator_ops as3722_ldo3_extcntrl_ops = { .get_current_limit = as3722_ldo3_get_current_limit, }; +static struct regulator_ops as3722_ldo6_ops = { + .is_enabled = regulator_is_enabled_regmap, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .map_voltage = regulator_map_voltage_linear_range, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .list_voltage = regulator_list_voltage_linear_range, + .get_current_limit = as3722_ldo_get_current_limit, + .set_current_limit = as3722_ldo_set_current_limit, + .get_bypass = regulator_get_bypass_regmap, + .set_bypass = regulator_set_bypass_regmap, +}; + +static struct regulator_ops as3722_ldo6_extcntrl_ops = { + .map_voltage = regulator_map_voltage_linear_range, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .list_voltage = regulator_list_voltage_linear_range, + .get_current_limit = as3722_ldo_get_current_limit, + .set_current_limit = as3722_ldo_set_current_limit, + .get_bypass = regulator_get_bypass_regmap, + .set_bypass = regulator_set_bypass_regmap, +}; + static const struct regulator_linear_range as3722_ldo_ranges[] = { REGULATOR_LINEAR_RANGE(0, 0x00, 0x00, 0), REGULATOR_LINEAR_RANGE(825000, 0x01, 0x24, 25000), @@ -829,6 +854,24 @@ static int as3722_regulator_probe(struct platform_device *pdev) } } break; + case AS3722_REGULATOR_ID_LDO6: + if (reg_config->ext_control) + ops = &as3722_ldo6_extcntrl_ops; + else + ops = &as3722_ldo6_ops; + as3722_regs->desc[id].enable_time = 500; + as3722_regs->desc[id].bypass_reg = + AS3722_LDO6_VOLTAGE_REG; + as3722_regs->desc[id].bypass_mask = + AS3722_LDO_VSEL_MASK; + as3722_regs->desc[id].bypass_val_on = + AS3722_LDO6_VSEL_BYPASS; + as3722_regs->desc[id].bypass_val_off = + AS3722_LDO6_VSEL_BYPASS; + as3722_regs->desc[id].linear_ranges = as3722_ldo_ranges; + as3722_regs->desc[id].n_linear_ranges = + ARRAY_SIZE(as3722_ldo_ranges); + break; case AS3722_REGULATOR_ID_SD0: case AS3722_REGULATOR_ID_SD1: case AS3722_REGULATOR_ID_SD6: diff --git a/include/linux/mfd/as3722.h b/include/linux/mfd/as3722.h index 8d43e9f2a842..51e6f9414575 100644 --- a/include/linux/mfd/as3722.h +++ b/include/linux/mfd/as3722.h @@ -196,6 +196,7 @@ #define AS3722_LDO3_VSEL_MIN 0x01 #define AS3722_LDO3_VSEL_MAX 0x2D #define AS3722_LDO3_NUM_VOLT 0x2D +#define AS3722_LDO6_VSEL_BYPASS 0x3F #define AS3722_LDO_VSEL_MASK 0x7F #define AS3722_LDO_VSEL_MIN 0x01 #define AS3722_LDO_VSEL_MAX 0x7F -- cgit v1.2.3 From a16d6ebca6efb73f6402f36e5aebf84f61721856 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 3 Apr 2016 20:44:45 +0200 Subject: i2c: introduce helper function to get 8 bit address from a message Drivers do this in various ways, let's use one standard way of doing it. Note: I2C_M_RD is bit 0, so the code could be simplified. To be extremly robust and to advertise good coding practices, I still use the ternary operator and let the compilers do the optimizing job. Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 200cf13b00f6..c30833b7b073 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -654,6 +654,11 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap) return adap->nr; } +static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) +{ + return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); +} + /** * module_i2c_driver() - Helper macro for registering a modular I2C driver * @__i2c_driver: i2c_driver struct -- cgit v1.2.3 From f0af9593372abfde34460aa1250e670cc535a7d8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 24 Feb 2016 13:43:45 -0600 Subject: PCI: Add pci_add_dma_alias() to abstract implementation Add a pci_add_dma_alias() interface to encapsulate the details of adding an alias. No functional change intended. Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson --- drivers/pci/pci.c | 14 ++++++++++++++ drivers/pci/quirks.c | 19 +++++++------------ include/linux/pci.h | 1 + 3 files changed, 22 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 25e0327d4429..1162118d1093 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4578,6 +4578,20 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode, return 0; } +/** + * pci_add_dma_alias - Add a DMA devfn alias for a device + * @dev: the PCI device for which alias is added + * @devfn: alias slot and function + * + * This helper encodes 8-bit devfn as bit number in dma_alias_mask. + * It should be called early, preferably as PCI fixup header quirk. + */ +void pci_add_dma_alias(struct pci_dev *dev, u8 devfn) +{ + dev->dma_alias_devfn = devfn; + dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; +} + bool pci_device_is_present(struct pci_dev *pdev) { u32 v; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8e678027b900..e45a7a8338bb 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3610,10 +3610,8 @@ int pci_dev_specific_reset(struct pci_dev *dev, int probe) static void quirk_dma_func0_alias(struct pci_dev *dev) { - if (PCI_FUNC(dev->devfn) != 0) { - dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); - dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; - } + if (PCI_FUNC(dev->devfn) != 0) + pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); } /* @@ -3626,10 +3624,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe476, quirk_dma_func0_alias); static void quirk_dma_func1_alias(struct pci_dev *dev) { - if (PCI_FUNC(dev->devfn) != 1) { - dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 1); - dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; - } + if (PCI_FUNC(dev->devfn) != 1) + pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 1)); } /* @@ -3696,11 +3692,10 @@ static void quirk_fixed_dma_alias(struct pci_dev *dev) id = pci_match_id(fixed_dma_alias_tbl, dev); if (id) { - dev->dma_alias_devfn = id->driver_data; - dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + pci_add_dma_alias(dev, id->driver_data); dev_info(&dev->dev, "Enabling fixed DMA alias to %02x.%d\n", - PCI_SLOT(dev->dma_alias_devfn), - PCI_FUNC(dev->dma_alias_devfn)); + PCI_SLOT(id->driver_data), + PCI_FUNC(id->driver_data)); } } diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..7e7019064437 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1988,6 +1988,7 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) } #endif +void pci_add_dma_alias(struct pci_dev *dev, u8 devfn); int pci_for_each_dma_alias(struct pci_dev *pdev, int (*fn)(struct pci_dev *pdev, u16 alias, void *data), void *data); -- cgit v1.2.3 From 338c3149a221527e202ee26b1e35f76c965bb6c0 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Thu, 3 Mar 2016 15:38:02 +0100 Subject: PCI: Add support for multiple DMA aliases Solve IOMMU support issues with PCIe non-transparent bridges that use Requester ID look-up tables (RID-LUT), e.g., the PEX8733. The NTB connects devices in two independent PCI domains. Devices separated by the NTB are not able to discover each other. A PCI packet being forwared from one domain to another has to have its RID modified so it appears on correct bus and completions are forwarded back to the original domain through the NTB. The RID is translated using a preprogrammed table (LUT) and the PCI packet propagates upstream away from the NTB. If the destination system has IOMMU enabled, the packet will be discarded because the new RID is unknown to the IOMMU. Adding a DMA alias for the new RID allows IOMMU to properly recognize the packet. Each device behind the NTB has a unique RID assigned in the RID-LUT. The current DMA alias implementation supports only a single alias, so it's not possible to support mutiple devices behind the NTB when IOMMU is enabled. Enable all possible aliases on a given bus (256) that are stored in a bitset. Alias devfn is directly translated to a bit number. The bitset is not allocated for devices that have no need for DMA aliases. More details can be found in the following article: http://www.plxtech.com/files/pdf/technical/expresslane/RTC_Enabling%20MulitHostSystemDesigns.pdf Signed-off-by: Jacek Lawrynowicz Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson Acked-by: David Woodhouse Acked-by: Joerg Roedel --- drivers/iommu/iommu.c | 10 +++------- drivers/pci/pci.c | 19 +++++++++++++++++-- drivers/pci/probe.c | 1 + drivers/pci/search.c | 14 +++++++++----- include/linux/pci.h | 5 ++--- 5 files changed, 32 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bfd4f7c3b1d8..1b49e940a318 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -660,8 +660,8 @@ static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, } /* - * Look for aliases to or from the given device for exisiting groups. The - * dma_alias_devfn only supports aliases on the same bus, therefore the search + * Look for aliases to or from the given device for existing groups. DMA + * aliases are only supported on the same bus, therefore the search * space is quite small (especially since we're really only looking at pcie * device, and therefore only expect multiple slots on the root complex or * downstream switch ports). It's conceivable though that a pair of @@ -686,11 +686,7 @@ static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, continue; /* We alias them or they alias us */ - if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && - pdev->dma_alias_devfn == tmp->devfn) || - ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && - tmp->dma_alias_devfn == pdev->devfn)) { - + if (pci_devs_are_dma_aliases(pdev, tmp)) { group = get_pci_alias_group(tmp, devfns); if (group) { pci_dev_put(tmp); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c82ebd0f6982..0b90c2186f1c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4588,12 +4588,27 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode, */ void pci_add_dma_alias(struct pci_dev *dev, u8 devfn) { - dev->dma_alias_devfn = devfn; - dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + if (!dev->dma_alias_mask) + dev->dma_alias_mask = kcalloc(BITS_TO_LONGS(U8_MAX), + sizeof(long), GFP_KERNEL); + if (!dev->dma_alias_mask) { + dev_warn(&dev->dev, "Unable to allocate DMA alias mask\n"); + return; + } + + set_bit(devfn, dev->dma_alias_mask); dev_info(&dev->dev, "Enabling fixed DMA alias to %02x.%d\n", PCI_SLOT(devfn), PCI_FUNC(devfn)); } +bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2) +{ + return (dev1->dma_alias_mask && + test_bit(dev2->devfn, dev1->dma_alias_mask)) || + (dev2->dma_alias_mask && + test_bit(dev1->devfn, dev2->dma_alias_mask)); +} + bool pci_device_is_present(struct pci_dev *pdev) { u32 v; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8004f67c57ec..ae7daeb83e21 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1537,6 +1537,7 @@ static void pci_release_dev(struct device *dev) pcibios_release_device(pci_dev); pci_bus_put(pci_dev->bus); kfree(pci_dev->driver_override); + kfree(pci_dev->dma_alias_mask); kfree(pci_dev); } diff --git a/drivers/pci/search.c b/drivers/pci/search.c index a20ce7d5e2a7..33e0f033a48e 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -40,11 +40,15 @@ int pci_for_each_dma_alias(struct pci_dev *pdev, * If the device is broken and uses an alias requester ID for * DMA, iterate over that too. */ - if (unlikely(pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) { - ret = fn(pdev, PCI_DEVID(pdev->bus->number, - pdev->dma_alias_devfn), data); - if (ret) - return ret; + if (unlikely(pdev->dma_alias_mask)) { + u8 devfn; + + for_each_set_bit(devfn, pdev->dma_alias_mask, U8_MAX) { + ret = fn(pdev, PCI_DEVID(pdev->bus->number, devfn), + data); + if (ret) + return ret; + } } for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 7e7019064437..5581d05f7833 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -166,8 +166,6 @@ enum pci_dev_flags { PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2), /* Flag for quirk use to store if quirk-specific ACS is enabled */ PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3), - /* Flag to indicate the device uses dma_alias_devfn */ - PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4), /* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */ PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5), /* Do not use bus resets for device */ @@ -273,7 +271,7 @@ struct pci_dev { u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ - u8 dma_alias_devfn;/* devfn of DMA alias, if any */ + unsigned long *dma_alias_mask;/* mask of enabled devfn aliases */ struct pci_driver *driver; /* which driver has allocated this device */ u64 dma_mask; /* Mask of the bits of bus address this @@ -1989,6 +1987,7 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) #endif void pci_add_dma_alias(struct pci_dev *dev, u8 devfn); +bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2); int pci_for_each_dma_alias(struct pci_dev *pdev, int (*fn)(struct pci_dev *pdev, u16 alias, void *data), void *data); -- cgit v1.2.3 From 5ac7eace2d00eab5ae0e9fdee63e38aee6001f7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:24 +0100 Subject: KEYS: Add a facility to restrict new links into a keyring Add a facility whereby proposed new links to be added to a keyring can be vetted, permitting them to be rejected if necessary. This can be used to block public keys from which the signature cannot be verified or for which the signature verification fails. It could also be used to provide blacklisting. This affects operations like add_key(), KEYCTL_LINK and KEYCTL_INSTANTIATE. To this end: (1) A function pointer is added to the key struct that, if set, points to the vetting function. This is called as: int (*restrict_link)(struct key *keyring, const struct key_type *key_type, unsigned long key_flags, const union key_payload *key_payload), where 'keyring' will be the keyring being added to, key_type and key_payload will describe the key being added and key_flags[*] can be AND'ed with KEY_FLAG_TRUSTED. [*] This parameter will be removed in a later patch when KEY_FLAG_TRUSTED is removed. The function should return 0 to allow the link to take place or an error (typically -ENOKEY, -ENOPKG or -EKEYREJECTED) to reject the link. The pointer should not be set directly, but rather should be set through keyring_alloc(). Note that if called during add_key(), preparse is called before this method, but a key isn't actually allocated until after this function is called. (2) KEY_ALLOC_BYPASS_RESTRICTION is added. This can be passed to key_create_or_update() or key_instantiate_and_link() to bypass the restriction check. (3) KEY_FLAG_TRUSTED_ONLY is removed. The entire contents of a keyring with this restriction emplaced can be considered 'trustworthy' by virtue of being in the keyring when that keyring is consulted. (4) key_alloc() and keyring_alloc() take an extra argument that will be used to set restrict_link in the new key. This ensures that the pointer is set before the key is published, thus preventing a window of unrestrictedness. Normally this argument will be NULL. (5) As a temporary affair, keyring_restrict_trusted_only() is added. It should be passed to keyring_alloc() as the extra argument instead of setting KEY_FLAG_TRUSTED_ONLY on a keyring. This will be replaced in a later patch with functions that look in the appropriate places for authoritative keys. Signed-off-by: David Howells Reviewed-by: Mimi Zohar --- Documentation/security/keys.txt | 22 ++++++++++++ certs/system_keyring.c | 8 ++--- fs/cifs/cifsacl.c | 2 +- fs/nfs/nfs4idmap.c | 2 +- include/linux/key.h | 53 +++++++++++++++++++++++------ net/dns_resolver/dns_key.c | 2 +- net/rxrpc/ar-key.c | 4 +-- security/integrity/digsig.c | 7 ++-- security/integrity/ima/ima_mok.c | 8 ++--- security/keys/key.c | 43 +++++++++++++++++++---- security/keys/keyring.c | 73 ++++++++++++++++++++++++++++++++++++---- security/keys/persistent.c | 4 +-- security/keys/process_keys.c | 16 +++++---- security/keys/request_key.c | 4 +-- security/keys/request_key_auth.c | 2 +- 15 files changed, 198 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 8c183873b2b7..a6a50b359025 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -999,6 +999,10 @@ payload contents" for more information. struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, const struct cred *cred, key_perm_t perm, + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *), unsigned long flags, struct key *dest); @@ -1010,6 +1014,24 @@ payload contents" for more information. KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted towards the user's quota). Error ENOMEM can also be returned. + If restrict_link not NULL, it should point to a function that will be + called each time an attempt is made to link a key into the new keyring. + This function is called to check whether a key may be added into the keying + or not. Callers of key_create_or_update() within the kernel can pass + KEY_ALLOC_BYPASS_RESTRICTION to suppress the check. An example of using + this is to manage rings of cryptographic keys that are set up when the + kernel boots where userspace is also permitted to add keys - provided they + can be verified by a key the kernel already has. + + When called, the restriction function will be passed the keyring being + added to, the key flags value and the type and payload of the key being + added. Note that when a new key is being created, this is called between + payload preparsing and actual key creation. The function should return 0 + to allow the link or an error to reject it. + + A convenience function, restrict_link_reject, exists to always return + -EPERM to in this case. + (*) To check the validity of a key, this function can be called: diff --git a/certs/system_keyring.c b/certs/system_keyring.c index dc18869ff680..417d65882870 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -36,11 +36,10 @@ static __init int system_trusted_keyring_init(void) KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH), - KEY_ALLOC_NOT_IN_QUOTA, NULL); + KEY_ALLOC_NOT_IN_QUOTA, + keyring_restrict_trusted_only, NULL); if (IS_ERR(system_trusted_keyring)) panic("Can't allocate system trusted keyring\n"); - - set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags); return 0; } @@ -85,7 +84,8 @@ static __init int load_system_certificate_list(void) KEY_USR_VIEW | KEY_USR_READ), KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_TRUSTED | - KEY_ALLOC_BUILT_IN); + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); if (IS_ERR(key)) { pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", PTR_ERR(key)); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 3f93125916bf..71e8a56e9479 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -360,7 +360,7 @@ init_cifs_idmap(void) GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA, NULL); + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 5ba22c6b0ffa..c444285bb1b1 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -201,7 +201,7 @@ int nfs_idmap_init(void) GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA, NULL); + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; diff --git a/include/linux/key.h b/include/linux/key.h index 5f5b1129dc92..83b603639d2e 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -174,10 +174,9 @@ struct key { #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ #define KEY_FLAG_TRUSTED 8 /* set if key is trusted */ -#define KEY_FLAG_TRUSTED_ONLY 9 /* set if keyring only accepts links to trusted keys */ -#define KEY_FLAG_BUILTIN 10 /* set if key is builtin */ -#define KEY_FLAG_ROOT_CAN_INVAL 11 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 12 /* set if key should not be removed */ +#define KEY_FLAG_BUILTIN 9 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 10 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 11 /* set if key should not be removed */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -205,6 +204,21 @@ struct key { }; int reject_error; }; + + /* This is set on a keyring to restrict the addition of a link to a key + * to it. If this method isn't provided then it is assumed that the + * keyring is open to any addition. It is ignored for non-keyring + * keys. + * + * This is intended for use with rings of trusted keys whereby addition + * to the keyring needs to be controlled. KEY_ALLOC_BYPASS_RESTRICTION + * overrides this, allowing the kernel to add extra keys without + * restriction. + */ + int (*restrict_link)(struct key *keyring, + const struct key_type *type, + unsigned long flags, + const union key_payload *payload); }; extern struct key *key_alloc(struct key_type *type, @@ -212,14 +226,19 @@ extern struct key *key_alloc(struct key_type *type, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, - unsigned long flags); + unsigned long flags, + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *)); -#define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ -#define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ -#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ -#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ -#define KEY_ALLOC_BUILT_IN 0x0008 /* Key is built into kernel */ +#define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ +#define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ +#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ +#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ +#define KEY_ALLOC_BUILT_IN 0x0008 /* Key is built into kernel */ +#define KEY_ALLOC_BYPASS_RESTRICTION 0x0010 /* Override the check on restricted keyrings */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); @@ -288,8 +307,22 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid const struct cred *cred, key_perm_t perm, unsigned long flags, + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *), struct key *dest); +extern int keyring_restrict_trusted_only(struct key *keyring, + const struct key_type *type, + unsigned long, + const union key_payload *payload); + +extern int restrict_link_reject(struct key *keyring, + const struct key_type *type, + unsigned long flags, + const union key_payload *payload); + extern int keyring_clear(struct key *keyring); extern key_ref_t keyring_search(key_ref_t keyring, diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index c79b85eb4d4c..8737412c7b27 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -281,7 +281,7 @@ static int __init init_dns_resolver(void) GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA, NULL); + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 3fb492eedeb9..1021b4c0bdd2 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -965,7 +965,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, key = key_alloc(&key_type_rxrpc, "x", GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0, - KEY_ALLOC_NOT_IN_QUOTA); + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); return -ENOMEM; @@ -1012,7 +1012,7 @@ struct key *rxrpc_get_null_key(const char *keyname) key = key_alloc(&key_type_rxrpc, keyname, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, - KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); + KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(key)) return key; diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 8ef15118cc78..659566c2200b 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -83,10 +83,9 @@ int __init integrity_init_keyring(const unsigned int id) ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | KEY_USR_SEARCH), - KEY_ALLOC_NOT_IN_QUOTA, NULL); - if (!IS_ERR(keyring[id])) - set_bit(KEY_FLAG_TRUSTED_ONLY, &keyring[id]->flags); - else { + KEY_ALLOC_NOT_IN_QUOTA, + NULL, NULL); + if (IS_ERR(keyring[id])) { err = PTR_ERR(keyring[id]); pr_info("Can't allocate %s keyring (%d)\n", keyring_name[id], err); diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c index 676885e4320e..ef91248cb934 100644 --- a/security/integrity/ima/ima_mok.c +++ b/security/integrity/ima/ima_mok.c @@ -35,20 +35,20 @@ __init int ima_mok_init(void) (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | KEY_USR_SEARCH, - KEY_ALLOC_NOT_IN_QUOTA, NULL); + KEY_ALLOC_NOT_IN_QUOTA, + keyring_restrict_trusted_only, NULL); ima_blacklist_keyring = keyring_alloc(".ima_blacklist", KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | KEY_USR_SEARCH, - KEY_ALLOC_NOT_IN_QUOTA, NULL); + KEY_ALLOC_NOT_IN_QUOTA, + keyring_restrict_trusted_only, NULL); if (IS_ERR(ima_mok_keyring) || IS_ERR(ima_blacklist_keyring)) panic("Can't allocate IMA MOK or blacklist keyrings."); - set_bit(KEY_FLAG_TRUSTED_ONLY, &ima_mok_keyring->flags); - set_bit(KEY_FLAG_TRUSTED_ONLY, &ima_blacklist_keyring->flags); set_bit(KEY_FLAG_KEEP, &ima_blacklist_keyring->flags); return 0; } diff --git a/security/keys/key.c b/security/keys/key.c index b28755131687..deb881754e03 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -201,6 +201,7 @@ serial_exists: * @cred: The credentials specifying UID namespace. * @perm: The permissions mask of the new key. * @flags: Flags specifying quota properties. + * @restrict_link: Optional link restriction method for new keyrings. * * Allocate a key of the specified type with the attributes given. The key is * returned in an uninstantiated state and the caller needs to instantiate the @@ -223,7 +224,11 @@ serial_exists: */ struct key *key_alloc(struct key_type *type, const char *desc, kuid_t uid, kgid_t gid, const struct cred *cred, - key_perm_t perm, unsigned long flags) + key_perm_t perm, unsigned long flags, + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *)) { struct key_user *user = NULL; struct key *key; @@ -291,6 +296,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->uid = uid; key->gid = gid; key->perm = perm; + key->restrict_link = restrict_link; if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; @@ -496,6 +502,12 @@ int key_instantiate_and_link(struct key *key, } if (keyring) { + if (keyring->restrict_link) { + ret = keyring->restrict_link(keyring, key->type, + key->flags, &prep.payload); + if (ret < 0) + goto error; + } ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret < 0) goto error; @@ -551,8 +563,12 @@ int key_reject_and_link(struct key *key, awaken = 0; ret = -EBUSY; - if (keyring) + if (keyring) { + if (keyring->restrict_link) + return -EPERM; + link_ret = __key_link_begin(keyring, &key->index_key, &edit); + } mutex_lock(&key_construction_mutex); @@ -793,6 +809,10 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, struct key *keyring, *key = NULL; key_ref_t key_ref; int ret; + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *) = NULL; /* look up the key type to see if it's one of the registered kernel * types */ @@ -811,6 +831,10 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_check(keyring); + key_ref = ERR_PTR(-EPERM); + if (!(flags & KEY_ALLOC_BYPASS_RESTRICTION)) + restrict_link = keyring->restrict_link; + key_ref = ERR_PTR(-ENOTDIR); if (keyring->type != &key_type_keyring) goto error_put_type; @@ -835,10 +859,15 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } index_key.desc_len = strlen(index_key.description); - key_ref = ERR_PTR(-EPERM); - if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags)) - goto error_free_prep; - flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0; + if (restrict_link) { + unsigned long kflags = prep.trusted ? KEY_FLAG_TRUSTED : 0; + ret = restrict_link(keyring, + index_key.type, kflags, &prep.payload); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_free_prep; + } + } ret = __key_link_begin(keyring, &index_key, &edit); if (ret < 0) { @@ -879,7 +908,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, /* allocate a new key */ key = key_alloc(index_key.type, index_key.description, - cred->fsuid, cred->fsgid, cred, perm, flags); + cred->fsuid, cred->fsgid, cred, perm, flags, NULL); if (IS_ERR(key)) { key_ref = ERR_CAST(key); goto error_link_end; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index f931ccfeefb0..d2d1f3378008 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -491,13 +491,18 @@ static long keyring_read(const struct key *keyring, */ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, - unsigned long flags, struct key *dest) + unsigned long flags, + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *), + struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, cred, perm, flags); + uid, gid, cred, perm, flags, restrict_link); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); if (ret < 0) { @@ -510,6 +515,51 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, } EXPORT_SYMBOL(keyring_alloc); +/** + * keyring_restrict_trusted_only - Restrict additions to a keyring to trusted keys only + * @keyring: The keyring being added to. + * @type: The type of key being added. + * @flags: The key flags. + * @payload: The payload of the key intended to be added. + * + * Reject the addition of any links to a keyring that point to keys that aren't + * marked as being trusted. It can be overridden by passing + * KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when adding a key + * to a keyring. + * + * This is meant to be passed as the restrict_link parameter to + * keyring_alloc(). + */ +int keyring_restrict_trusted_only(struct key *keyring, + const struct key_type *type, + unsigned long flags, + const union key_payload *payload) +{ + return flags & KEY_FLAG_TRUSTED ? 0 : -EPERM; +} + +/** + * restrict_link_reject - Give -EPERM to restrict link + * @keyring: The keyring being added to. + * @type: The type of key being added. + * @flags: The key flags. + * @payload: The payload of the key intended to be added. + * + * Reject the addition of any links to a keyring. It can be overridden by + * passing KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when + * adding a key to a keyring. + * + * This is meant to be passed as the restrict_link parameter to + * keyring_alloc(). + */ +int restrict_link_reject(struct key *keyring, + const struct key_type *type, + unsigned long flags, + const union key_payload *payload) +{ + return -EPERM; +} + /* * By default, we keys found by getting an exact match on their descriptions. */ @@ -1191,6 +1241,17 @@ void __key_link_end(struct key *keyring, up_write(&keyring->sem); } +/* + * Check addition of keys to restricted keyrings. + */ +static int __key_link_check_restriction(struct key *keyring, struct key *key) +{ + if (!keyring->restrict_link) + return 0; + return keyring->restrict_link(keyring, + key->type, key->flags, &key->payload); +} + /** * key_link - Link a key to a keyring * @keyring: The keyring to make the link in. @@ -1221,14 +1282,12 @@ int key_link(struct key *keyring, struct key *key) key_check(keyring); key_check(key); - if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) && - !test_bit(KEY_FLAG_TRUSTED, &key->flags)) - return -EPERM; - ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret == 0) { kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage)); - ret = __key_link_check_live_key(keyring, key); + ret = __key_link_check_restriction(keyring, key); + if (ret == 0) + ret = __key_link_check_live_key(keyring, key); if (ret == 0) __key_link(key, &edit); __key_link_end(keyring, &key->index_key, edit); diff --git a/security/keys/persistent.c b/security/keys/persistent.c index c9fae5ea89fe..2ef45b319dd9 100644 --- a/security/keys/persistent.c +++ b/security/keys/persistent.c @@ -26,7 +26,7 @@ static int key_create_persistent_register(struct user_namespace *ns) current_cred(), ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA, NULL); + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(reg)) return PTR_ERR(reg); @@ -60,7 +60,7 @@ static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid, uid, INVALID_GID, current_cred(), ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA, + KEY_ALLOC_NOT_IN_QUOTA, NULL, ns->persistent_keyring_register); if (IS_ERR(persistent)) return ERR_CAST(persistent); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index e6d50172872f..40a885239782 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -76,7 +76,8 @@ int install_user_keyrings(void) if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID, cred, user_keyring_perm, - KEY_ALLOC_IN_QUOTA, NULL); + KEY_ALLOC_IN_QUOTA, + NULL, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); goto error; @@ -92,7 +93,8 @@ int install_user_keyrings(void) session_keyring = keyring_alloc(buf, user->uid, INVALID_GID, cred, user_keyring_perm, - KEY_ALLOC_IN_QUOTA, NULL); + KEY_ALLOC_IN_QUOTA, + NULL, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -134,7 +136,8 @@ int install_thread_keyring_to_cred(struct cred *new) keyring = keyring_alloc("_tid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, - KEY_ALLOC_QUOTA_OVERRUN, NULL); + KEY_ALLOC_QUOTA_OVERRUN, + NULL, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -180,7 +183,8 @@ int install_process_keyring_to_cred(struct cred *new) keyring = keyring_alloc("_pid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, - KEY_ALLOC_QUOTA_OVERRUN, NULL); + KEY_ALLOC_QUOTA_OVERRUN, + NULL, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -231,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred, KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, - flags, NULL); + flags, NULL, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { @@ -785,7 +789,7 @@ long join_session_keyring(const char *name) keyring = keyring_alloc( name, old->uid, old->gid, old, KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK, - KEY_ALLOC_IN_QUOTA, NULL); + KEY_ALLOC_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index c7a117c9a8f3..a29e3554751e 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -116,7 +116,7 @@ static int call_sbin_request_key(struct key_construction *cons, cred = get_current_cred(); keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_QUOTA_OVERRUN, NULL); + KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL); put_cred(cred); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -355,7 +355,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx, key = key_alloc(ctx->index_key.type, ctx->index_key.description, ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred, - perm, flags); + perm, flags, NULL); if (IS_ERR(key)) goto alloc_failed; diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 4f0f112fe276..9db8b4a82787 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -202,7 +202,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, authkey = key_alloc(&key_type_request_key_auth, desc, cred->fsuid, cred->fsgid, cred, KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | - KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA); + KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(authkey)) { ret = PTR_ERR(authkey); goto error_alloc; -- cgit v1.2.3 From 77f68bac9481ad440f4f34dda3d28c2dce6eb87b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:26 +0100 Subject: KEYS: Remove KEY_FLAG_TRUSTED and KEY_ALLOC_TRUSTED Remove KEY_FLAG_TRUSTED and KEY_ALLOC_TRUSTED as they're no longer meaningful. Also we can drop the trusted flag from the preparse structure. Given this, we no longer need to pass the key flags through to restrict_link(). Further, we can now get rid of keyring_restrict_trusted_only() also. Signed-off-by: David Howells --- certs/system_keyring.c | 2 -- include/keys/system_keyring.h | 1 - include/linux/key-type.h | 1 - include/linux/key.h | 21 +++++---------------- security/integrity/digsig.c | 3 +-- security/keys/key.c | 11 ++--------- security/keys/keyring.c | 29 +---------------------------- 7 files changed, 9 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 4e2fa8ab01d6..e460d00a7781 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -31,7 +31,6 @@ extern __initconst const unsigned long system_certificate_list_size; */ int restrict_link_by_builtin_trusted(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload) { return restrict_link_by_signature(system_trusted_keyring, @@ -97,7 +96,6 @@ static __init int load_system_certificate_list(void) ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ), KEY_ALLOC_NOT_IN_QUOTA | - KEY_ALLOC_TRUSTED | KEY_ALLOC_BUILT_IN | KEY_ALLOC_BYPASS_RESTRICTION); if (IS_ERR(key)) { diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 93715913a0b1..c72330ae76df 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -18,7 +18,6 @@ extern int restrict_link_by_builtin_trusted(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload); #else diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 7463355a198b..eaee981c5558 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -45,7 +45,6 @@ struct key_preparsed_payload { size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ time_t expiry; /* Expiry time of key */ - bool trusted; /* True if key is trusted */ }; typedef int (*request_key_actor_t)(struct key_construction *key, diff --git a/include/linux/key.h b/include/linux/key.h index 83b603639d2e..722914798f37 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -173,10 +173,9 @@ struct key { #define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ -#define KEY_FLAG_TRUSTED 8 /* set if key is trusted */ -#define KEY_FLAG_BUILTIN 9 /* set if key is built in to the kernel */ -#define KEY_FLAG_ROOT_CAN_INVAL 10 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 11 /* set if key should not be removed */ +#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 10 /* set if key should not be removed */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -217,7 +216,6 @@ struct key { */ int (*restrict_link)(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload); }; @@ -229,16 +227,14 @@ extern struct key *key_alloc(struct key_type *type, unsigned long flags, int (*restrict_link)(struct key *, const struct key_type *, - unsigned long, const union key_payload *)); #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ #define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ -#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ -#define KEY_ALLOC_BUILT_IN 0x0008 /* Key is built into kernel */ -#define KEY_ALLOC_BYPASS_RESTRICTION 0x0010 /* Override the check on restricted keyrings */ +#define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ +#define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); @@ -309,18 +305,11 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid unsigned long flags, int (*restrict_link)(struct key *, const struct key_type *, - unsigned long, const union key_payload *), struct key *dest); -extern int keyring_restrict_trusted_only(struct key *keyring, - const struct key_type *type, - unsigned long, - const union key_payload *payload); - extern int restrict_link_reject(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload); extern int keyring_clear(struct key *keyring); diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index d647178c6bbd..98ee4c752cf5 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -51,12 +51,11 @@ static bool init_keyring __initdata; */ static int restrict_link_by_ima_mok(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload) { int ret; - ret = restrict_link_by_builtin_trusted(keyring, type, flags, payload); + ret = restrict_link_by_builtin_trusted(keyring, type, payload); if (ret != -ENOKEY) return ret; diff --git a/security/keys/key.c b/security/keys/key.c index deb881754e03..bd5a272f28a6 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -227,7 +227,6 @@ struct key *key_alloc(struct key_type *type, const char *desc, key_perm_t perm, unsigned long flags, int (*restrict_link)(struct key *, const struct key_type *, - unsigned long, const union key_payload *)) { struct key_user *user = NULL; @@ -300,8 +299,6 @@ struct key *key_alloc(struct key_type *type, const char *desc, if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; - if (flags & KEY_ALLOC_TRUSTED) - key->flags |= 1 << KEY_FLAG_TRUSTED; if (flags & KEY_ALLOC_BUILT_IN) key->flags |= 1 << KEY_FLAG_BUILTIN; @@ -504,7 +501,7 @@ int key_instantiate_and_link(struct key *key, if (keyring) { if (keyring->restrict_link) { ret = keyring->restrict_link(keyring, key->type, - key->flags, &prep.payload); + &prep.payload); if (ret < 0) goto error; } @@ -811,7 +808,6 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, int ret; int (*restrict_link)(struct key *, const struct key_type *, - unsigned long, const union key_payload *) = NULL; /* look up the key type to see if it's one of the registered kernel @@ -843,7 +839,6 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, prep.data = payload; prep.datalen = plen; prep.quotalen = index_key.type->def_datalen; - prep.trusted = flags & KEY_ALLOC_TRUSTED; prep.expiry = TIME_T_MAX; if (index_key.type->preparse) { ret = index_key.type->preparse(&prep); @@ -860,9 +855,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, index_key.desc_len = strlen(index_key.description); if (restrict_link) { - unsigned long kflags = prep.trusted ? KEY_FLAG_TRUSTED : 0; - ret = restrict_link(keyring, - index_key.type, kflags, &prep.payload); + ret = restrict_link(keyring, index_key.type, &prep.payload); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_free_prep; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index d2d1f3378008..c91e4e0cea08 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -494,7 +494,6 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, unsigned long flags, int (*restrict_link)(struct key *, const struct key_type *, - unsigned long, const union key_payload *), struct key *dest) { @@ -515,34 +514,10 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, } EXPORT_SYMBOL(keyring_alloc); -/** - * keyring_restrict_trusted_only - Restrict additions to a keyring to trusted keys only - * @keyring: The keyring being added to. - * @type: The type of key being added. - * @flags: The key flags. - * @payload: The payload of the key intended to be added. - * - * Reject the addition of any links to a keyring that point to keys that aren't - * marked as being trusted. It can be overridden by passing - * KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when adding a key - * to a keyring. - * - * This is meant to be passed as the restrict_link parameter to - * keyring_alloc(). - */ -int keyring_restrict_trusted_only(struct key *keyring, - const struct key_type *type, - unsigned long flags, - const union key_payload *payload) -{ - return flags & KEY_FLAG_TRUSTED ? 0 : -EPERM; -} - /** * restrict_link_reject - Give -EPERM to restrict link * @keyring: The keyring being added to. * @type: The type of key being added. - * @flags: The key flags. * @payload: The payload of the key intended to be added. * * Reject the addition of any links to a keyring. It can be overridden by @@ -554,7 +529,6 @@ int keyring_restrict_trusted_only(struct key *keyring, */ int restrict_link_reject(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload) { return -EPERM; @@ -1248,8 +1222,7 @@ static int __key_link_check_restriction(struct key *keyring, struct key *key) { if (!keyring->restrict_link) return 0; - return keyring->restrict_link(keyring, - key->type, key->flags, &key->payload); + return keyring->restrict_link(keyring, key->type, &key->payload); } /** -- cgit v1.2.3 From 9a6f2b0113c8fce815db7c9d23754bdea4b428a0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 11 Apr 2016 21:40:05 +0200 Subject: net: mdio: Fix lockdep falls positive splat MDIO devices can be stacked upon each other. The current code supports two levels, which until recently has been enough for a DSA mdio bus on top of another bus. Now we have hardware which has an MDIO mux in the middle. Define an MDIO MUTEX class with three levels. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux.c | 10 ++-------- drivers/net/phy/mdio_bus.c | 4 ++-- include/linux/mdio.h | 11 +++++++++++ 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 308ade0eb1b6..5c81d6faf304 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -45,13 +45,7 @@ static int mdio_mux_read(struct mii_bus *bus, int phy_id, int regnum) struct mdio_mux_parent_bus *pb = cb->parent; int r; - /* In theory multiple mdio_mux could be stacked, thus creating - * more than a single level of nesting. But in practice, - * SINGLE_DEPTH_NESTING will cover the vast majority of use - * cases. We use it, instead of trying to handle the general - * case. - */ - mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&pb->mii_bus->mdio_lock, MDIO_MUTEX_MUX); r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); if (r) goto out; @@ -76,7 +70,7 @@ static int mdio_mux_write(struct mii_bus *bus, int phy_id, int r; - mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&pb->mii_bus->mdio_lock, MDIO_MUTEX_MUX); r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); if (r) goto out; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 0cba64f1ecf4..751202a285a6 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -457,7 +457,7 @@ int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum) BUG_ON(in_interrupt()); - mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); retval = bus->read(bus, addr, regnum); mutex_unlock(&bus->mdio_lock); @@ -509,7 +509,7 @@ int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val) BUG_ON(in_interrupt()); - mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); err = bus->write(bus, addr, regnum, val); mutex_unlock(&bus->mdio_lock); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 5bfd99d1a40a..bf9d1d750693 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -13,6 +13,17 @@ struct mii_bus; +/* Multiple levels of nesting are possible. However typically this is + * limited to nested DSA like layer, a MUX layer, and the normal + * user. Instead of trying to handle the general case, just define + * these cases. + */ +enum mdio_mutex_lock_class { + MDIO_MUTEX_NORMAL, + MDIO_MUTEX_MUX, + MDIO_MUTEX_NESTED, +}; + struct mdio_device { struct device dev; -- cgit v1.2.3 From 757d12e5849be549076901b0d33c60d5f360269c Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 12 Apr 2016 21:07:06 +0530 Subject: dmaengine: ensure dmaengine helpers check valid callback dmaengine has various device callbacks and exposes helper functions to invoke these. These helpers should check if channel, device and callback is valid or not before invoking them. Reported-by: Jon Hunter Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 017433712833..30de0197263a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -804,6 +804,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( sg_dma_address(&sg) = buf; sg_dma_len(&sg) = len; + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, &sg, 1, dir, flags, NULL); } @@ -812,6 +815,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction dir, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, dir, flags, NULL); } @@ -823,6 +829,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg( enum dma_transfer_direction dir, unsigned long flags, struct rio_dma_ext *rio_ext) { + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, dir, flags, rio_ext); } @@ -833,6 +842,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic( size_t period_len, enum dma_transfer_direction dir, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_dma_cyclic) + return NULL; + return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len, period_len, dir, flags); } @@ -841,6 +853,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_interleaved_dma) + return NULL; + return chan->device->device_prep_interleaved_dma(chan, xt, flags); } @@ -848,7 +863,7 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags) { - if (!chan || !chan->device) + if (!chan || !chan->device || !chan->device->device_prep_dma_memset) return NULL; return chan->device->device_prep_dma_memset(chan, dest, value, @@ -861,6 +876,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( struct scatterlist *src_sg, unsigned int src_nents, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_dma_sg) + return NULL; + return chan->device->device_prep_dma_sg(chan, dst_sg, dst_nents, src_sg, src_nents, flags); } -- cgit v1.2.3 From 37e58237a16b94fcd2c2d1b7e9c6e1ca661c231b Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Tue, 22 Mar 2016 00:24:44 -0700 Subject: block: add offset in blk_add_request_payload() We could kmalloc() the payload, so need the offset in page. Signed-off-by: Ming Lin Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++-- drivers/block/skd_main.c | 2 +- drivers/scsi/sd.c | 2 +- include/linux/blkdev.h | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index b60537b2c35b..c50227796a26 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1523,6 +1523,7 @@ EXPORT_SYMBOL(blk_put_request); * blk_add_request_payload - add a payload to a request * @rq: request to update * @page: page backing the payload + * @offset: offset in page * @len: length of the payload. * * This allows to later add a payload to an already submitted request by @@ -1533,12 +1534,12 @@ EXPORT_SYMBOL(blk_put_request); * discard requests should ever use it. */ void blk_add_request_payload(struct request *rq, struct page *page, - unsigned int len) + int offset, unsigned int len) { struct bio *bio = rq->bio; bio->bi_io_vec->bv_page = page; - bio->bi_io_vec->bv_offset = 0; + bio->bi_io_vec->bv_offset = offset; bio->bi_io_vec->bv_len = len; bio->bi_iter.bi_size = len; diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 586f9168ffa4..9a9ec212fab8 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -562,7 +562,7 @@ skd_prep_discard_cdb(struct skd_scsi_request *scsi_req, put_unaligned_be32(count, &buf[16]); req = skreq->req; - blk_add_request_payload(req, page, len); + blk_add_request_payload(req, page, 0, len); } static void skd_request_fn_not_online(struct request_queue *q); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f52b74cf8d1e..69b0a4a7a15f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -779,7 +779,7 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd) * discarded on disk. This allows us to report completion on the full * amount of blocks described by the request. */ - blk_add_request_payload(rq, page, len); + blk_add_request_payload(rq, page, 0, len); ret = scsi_init_io(cmd); rq->__data_len = nr_bytes; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 669e419d6234..bbaa76757018 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -779,7 +779,7 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_add_request_payload(struct request *rq, struct page *page, - unsigned int len); + int offset, unsigned int len); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, -- cgit v1.2.3 From e0489487ec9cd79ee1fa0dc5d3789c08b0e51a2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 10 Mar 2016 13:58:46 +0200 Subject: blk-mq: Export tagset iter function Its useful to iterate on all the active tags in cases where we will need to fail all the queues IO. Signed-off-by: Sagi Grimberg [hch: carefully check for valid tagsets] Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 12 ++++++++++++ include/linux/blk-mq.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index abdbb47405cb..2fd04286f103 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -474,6 +474,18 @@ void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, } EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); +void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, + busy_tag_iter_fn *fn, void *priv) +{ + int i; + + for (i = 0; i < tagset->nr_hw_queues; i++) { + if (tagset->tags && tagset->tags[i]) + blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv); + } +} +EXPORT_SYMBOL(blk_mq_tagset_busy_iter); + void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9ac9799b702b..c808fec1ce44 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -240,6 +240,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv); +void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, + busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); -- cgit v1.2.3 From e8f1e1630b0a98685d1a3521e8aba0dc7e68082c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 10 Mar 2016 13:58:49 +0200 Subject: blk-mq: Make blk_mq_all_tag_busy_iter static No caller outside the blk-mq code so we can settle with it static. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 5 ++--- include/linux/blk-mq.h | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 2fd04286f103..56a0c37a3d06 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -464,15 +464,14 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, } } -void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, - void *priv) +static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, + busy_tag_iter_fn *fn, void *priv) { if (tags->nr_reserved_tags) bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true); bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, false); } -EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c808fec1ce44..2498fdf3a503 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,8 +238,6 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, - void *priv); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); -- cgit v1.2.3 From 9fd4dcece43a53e5a9e65a973df5693702ee6401 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:13 +0100 Subject: debugfs: prevent access to possibly dead file_operations at file open Nothing prevents a dentry found by path lookup before a return of __debugfs_remove() to actually get opened after that return. Now, after the return of __debugfs_remove(), there are no guarantees whatsoever regarding the memory the corresponding inode's file_operations object had been kept in. Since __debugfs_remove() is seldomly invoked, usually from module exit handlers only, the race is hard to trigger and the impact is very low. A discussion of the problem outlined above as well as a suggested solution can be found in the (sub-)thread rooted at http://lkml.kernel.org/g/20130401203445.GA20862@ZenIV.linux.org.uk ("Yet another pipe related oops.") Basically, Greg KH suggests to introduce an intermediate fops and Al Viro points out that a pointer to the original ones may be stored in ->d_fsdata. Follow this line of reasoning: - Add SRCU as a reverse dependency of DEBUG_FS. - Introduce a srcu_struct object for the debugfs subsystem. - In debugfs_create_file(), store a pointer to the original file_operations object in ->d_fsdata. - Make debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace period after the dentry has been delete()'d and before they return to their callers. - Introduce an intermediate file_operations object named "debugfs_open_proxy_file_operations". It's ->open() functions checks, under the protection of a SRCU read lock, whether the dentry is still alive, i.e. has not been d_delete()'d and if so, tries to acquire a reference on the owning module. On success, it sets the file object's ->f_op to the original file_operations and forwards the ongoing open() call to the original ->open(). - For clarity, rename the former debugfs_file_operations to debugfs_noop_file_operations -- they are in no way canonical. The choice of SRCU over "normal" RCU is justified by the fact, that the former may also be used to protect ->i_private data from going away during the execution of a file's readers and writers which may (and do) sleep. Finally, introduce the fs/debugfs/internal.h header containing some declarations internal to the debugfs implementation. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/debugfs/inode.c | 13 ++++++- fs/debugfs/internal.h | 24 +++++++++++++ include/linux/debugfs.h | 3 -- lib/Kconfig.debug | 1 + 5 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 fs/debugfs/internal.h (limited to 'include/linux') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index d2ba12e23ed9..736ab3c988f2 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -22,6 +22,9 @@ #include #include #include +#include + +#include "internal.h" static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -35,13 +38,99 @@ static ssize_t default_write_file(struct file *file, const char __user *buf, return count; } -const struct file_operations debugfs_file_operations = { +const struct file_operations debugfs_noop_file_operations = { .read = default_read_file, .write = default_write_file, .open = simple_open, .llseek = noop_llseek, }; +/** + * debugfs_use_file_start - mark the beginning of file data access + * @dentry: the dentry object whose data is being accessed. + * @srcu_idx: a pointer to some memory to store a SRCU index in. + * + * Up to a matching call to debugfs_use_file_finish(), any + * successive call into the file removing functions debugfs_remove() + * and debugfs_remove_recursive() will block. Since associated private + * file data may only get freed after a successful return of any of + * the removal functions, you may safely access it after a successful + * call to debugfs_use_file_start() without worrying about + * lifetime issues. + * + * If -%EIO is returned, the file has already been removed and thus, + * it is not safe to access any of its data. If, on the other hand, + * it is allowed to access the file data, zero is returned. + * + * Regardless of the return code, any call to + * debugfs_use_file_start() must be followed by a matching call + * to debugfs_use_file_finish(). + */ +static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) + __acquires(&debugfs_srcu) +{ + *srcu_idx = srcu_read_lock(&debugfs_srcu); + barrier(); + if (d_unlinked(dentry)) + return -EIO; + return 0; +} + +/** + * debugfs_use_file_finish - mark the end of file data access + * @srcu_idx: the SRCU index "created" by a former call to + * debugfs_use_file_start(). + * + * Allow any ongoing concurrent call into debugfs_remove() or + * debugfs_remove_recursive() blocked by a former call to + * debugfs_use_file_start() to proceed and return to its caller. + */ +static void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) +{ + srcu_read_unlock(&debugfs_srcu, srcu_idx); +} + +#define F_DENTRY(filp) ((filp)->f_path.dentry) + +#define REAL_FOPS_DEREF(dentry) \ + ((const struct file_operations *)(dentry)->d_fsdata) + +static int open_proxy_open(struct inode *inode, struct file *filp) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = NULL; + int srcu_idx, r; + + r = debugfs_use_file_start(dentry, &srcu_idx); + if (r) { + r = -ENOENT; + goto out; + } + + real_fops = REAL_FOPS_DEREF(dentry); + real_fops = fops_get(real_fops); + if (!real_fops) { + /* Huh? Module did not clean up after itself at exit? */ + WARN(1, "debugfs file owner did not clean up at exit: %pd", + dentry); + r = -ENXIO; + goto out; + } + replace_fops(filp, real_fops); + + if (real_fops->open) + r = real_fops->open(inode, filp); + +out: + fops_put(real_fops); + debugfs_use_file_finish(srcu_idx); + return r; +} + +const struct file_operations debugfs_open_proxy_file_operations = { + .open = open_proxy_open, +}; + static struct dentry *debugfs_create_mode(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b1e7f35f3cd4..2905dd160575 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -27,9 +27,14 @@ #include #include #include +#include + +#include "internal.h" #define DEBUGFS_DEFAULT_MODE 0700 +DEFINE_SRCU(debugfs_srcu); + static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; @@ -341,8 +346,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, return failed_creating(dentry); inode->i_mode = mode; - inode->i_fop = fops ? fops : &debugfs_file_operations; inode->i_private = data; + + inode->i_fop = fops ? &debugfs_open_proxy_file_operations + : &debugfs_noop_file_operations; + dentry->d_fsdata = (void *)fops; + d_instantiate(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); @@ -570,6 +579,7 @@ void debugfs_remove(struct dentry *dentry) inode_unlock(d_inode(parent)); if (!ret) simple_release_fs(&debugfs_mount, &debugfs_mount_count); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove); @@ -647,6 +657,7 @@ void debugfs_remove_recursive(struct dentry *dentry) if (!__debugfs_remove(child, parent)) simple_release_fs(&debugfs_mount, &debugfs_mount_count); inode_unlock(d_inode(parent)); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h new file mode 100644 index 000000000000..c7aaa5cb6685 --- /dev/null +++ b/fs/debugfs/internal.h @@ -0,0 +1,24 @@ +/* + * internal.h - declarations internal to debugfs + * + * Copyright (C) 2016 Nicolai Stange + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + */ + +#ifndef _DEBUGFS_INTERNAL_H_ +#define _DEBUGFS_INTERNAL_H_ + +struct file_operations; +struct srcu_struct; + +/* declared over in file.c */ +extern const struct file_operations debugfs_noop_file_operations; +extern const struct file_operations debugfs_open_proxy_file_operations; + +extern struct srcu_struct debugfs_srcu; + +#endif /* _DEBUGFS_INTERNAL_H_ */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 981e53ab84e8..fcafe2d389f9 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -43,9 +43,6 @@ extern struct dentry *arch_debugfs_dir; #if defined(CONFIG_DEBUG_FS) -/* declared over in file.c */ -extern const struct file_operations debugfs_file_operations; - struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1e9a607534ca..ddb0e8337aae 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -257,6 +257,7 @@ config PAGE_OWNER config DEBUG_FS bool "Debug Filesystem" + select SRCU help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and -- cgit v1.2.3 From 49d200deaa680501f19a247b1fffb29301e51d2b Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:14 +0100 Subject: debugfs: prevent access to removed files' private data Upon return of debugfs_remove()/debugfs_remove_recursive(), it might still be attempted to access associated private file data through previously opened struct file objects. If that data has been freed by the caller of debugfs_remove*() in the meanwhile, the reading/writing process would either encounter a fault or, if the memory address in question has been reassigned again, unrelated data structures could get overwritten. However, since debugfs files are seldomly removed, usually from module exit handlers only, the impact is very low. Currently, there are ~1000 call sites of debugfs_create_file() spread throughout the whole tree and touching all of those struct file_operations in order to make them file removal aware by means of checking the result of debugfs_use_file_start() from within their methods is unfeasible. Instead, wrap the struct file_operations by a lifetime managing proxy at file open: - In debugfs_create_file(), the original fops handed in has got stashed away in ->d_fsdata already. - In debugfs_create_file(), install a proxy file_operations factory, debugfs_full_proxy_file_operations, at ->i_fop. This proxy factory has got an ->open() method only. It carries out some lifetime checks and if successful, dynamically allocates and sets up a new struct file_operations proxy at ->f_op. Afterwards, it forwards to the ->open() of the original struct file_operations in ->d_fsdata, if any. The dynamically set up proxy at ->f_op has got a lifetime managing wrapper set for each of the methods defined in the original struct file_operations in ->d_fsdata. Its ->release()er frees the proxy again and forwards to the original ->release(), if any. In order not to mislead the VFS layer, it is strictly necessary to leave those fields blank in the proxy that have been NULL in the original struct file_operations also, i.e. aren't supported. This is why there is a need for dynamically allocated proxies. The choice made not to allocate a proxy instance for every dentry at file creation, but for every struct file object instantiated thereof is justified by the expected usage pattern of debugfs, namely that in general very few files get opened more than once at a time. The wrapper methods set in the struct file_operations implement lifetime managing by means of the SRCU protection facilities already in place for debugfs: They set up a SRCU read side critical section and check whether the dentry is still alive by means of debugfs_use_file_start(). If so, they forward the call to the original struct file_operation stored in ->d_fsdata, still under the protection of the SRCU read side critical section. This SRCU read side critical section prevents any pending debugfs_remove() and friends to return to their callers. Since a file's private data must only be freed after the return of debugfs_remove(), the ongoing proxied call is guarded against any file removal race. If, on the other hand, the initial call to debugfs_use_file_start() detects that the dentry is dead, the wrapper simply returns -EIO and does not forward the call. Note that the ->poll() wrapper is special in that its signature does not allow for the return of arbitrary -EXXX values and thus, POLLHUP is returned here. In order not to pollute debugfs with wrapper definitions that aren't ever needed, I chose not to define a wrapper for every struct file_operations method possible. Instead, a wrapper is defined only for the subset of methods which are actually set by any debugfs users. Currently, these are: ->llseek() ->read() ->write() ->unlocked_ioctl() ->poll() The ->release() wrapper is special in that it does not protect the original ->release() in any way from dead files in order not to leak resources. Thus, any ->release() handed to debugfs must implement file lifetime management manually, if needed. For only 33 out of a total of 434 releasers handed in to debugfs, it could not be verified immediately whether they access data structures that might have been freed upon a debugfs_remove() return in the meanwhile. Export debugfs_use_file_start() and debugfs_use_file_finish() in order to allow any ->release() to manually implement file lifetime management. For a set of common cases of struct file_operations implemented by the debugfs_core itself, future patches will incorporate file lifetime management directly within those in order to allow for their unproxied operation. Rename the original, non-proxying "debugfs_create_file()" to "debugfs_create_file_unsafe()" and keep it for future internal use by debugfs itself. Factor out code common to both into the new __debugfs_create_file(). Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++- fs/debugfs/inode.c | 70 ++++++++++++++------- fs/debugfs/internal.h | 6 +- include/linux/debugfs.h | 20 ++++++ 4 files changed, 226 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 736ab3c988f2..6eb58a8ed03c 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -23,9 +23,12 @@ #include #include #include +#include #include "internal.h" +struct poll_table_struct; + static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -66,7 +69,7 @@ const struct file_operations debugfs_noop_file_operations = { * debugfs_use_file_start() must be followed by a matching call * to debugfs_use_file_finish(). */ -static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) +int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) __acquires(&debugfs_srcu) { *srcu_idx = srcu_read_lock(&debugfs_srcu); @@ -75,6 +78,7 @@ static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) return -EIO; return 0; } +EXPORT_SYMBOL_GPL(debugfs_use_file_start); /** * debugfs_use_file_finish - mark the end of file data access @@ -85,10 +89,11 @@ static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) * debugfs_remove_recursive() blocked by a former call to * debugfs_use_file_start() to proceed and return to its caller. */ -static void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) +void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { srcu_read_unlock(&debugfs_srcu, srcu_idx); } +EXPORT_SYMBOL_GPL(debugfs_use_file_finish); #define F_DENTRY(filp) ((filp)->f_path.dentry) @@ -131,6 +136,154 @@ const struct file_operations debugfs_open_proxy_file_operations = { .open = open_proxy_open, }; +#define PROTO(args...) args +#define ARGS(args...) args + +#define FULL_PROXY_FUNC(name, ret_type, filp, proto, args) \ +static ret_type full_proxy_ ## name(proto) \ +{ \ + const struct dentry *dentry = F_DENTRY(filp); \ + const struct file_operations *real_fops = \ + REAL_FOPS_DEREF(dentry); \ + int srcu_idx; \ + ret_type r; \ + \ + r = debugfs_use_file_start(dentry, &srcu_idx); \ + if (likely(!r)) \ + r = real_fops->name(args); \ + debugfs_use_file_finish(srcu_idx); \ + return r; \ +} + +FULL_PROXY_FUNC(llseek, loff_t, filp, + PROTO(struct file *filp, loff_t offset, int whence), + ARGS(filp, offset, whence)); + +FULL_PROXY_FUNC(read, ssize_t, filp, + PROTO(struct file *filp, char __user *buf, size_t size, + loff_t *ppos), + ARGS(filp, buf, size, ppos)); + +FULL_PROXY_FUNC(write, ssize_t, filp, + PROTO(struct file *filp, const char __user *buf, size_t size, + loff_t *ppos), + ARGS(filp, buf, size, ppos)); + +FULL_PROXY_FUNC(unlocked_ioctl, long, filp, + PROTO(struct file *filp, unsigned int cmd, unsigned long arg), + ARGS(filp, cmd, arg)); + +static unsigned int full_proxy_poll(struct file *filp, + struct poll_table_struct *wait) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry); + int srcu_idx; + unsigned int r = 0; + + if (debugfs_use_file_start(dentry, &srcu_idx)) { + debugfs_use_file_finish(srcu_idx); + return POLLHUP; + } + + r = real_fops->poll(filp, wait); + debugfs_use_file_finish(srcu_idx); + return r; +} + +static int full_proxy_release(struct inode *inode, struct file *filp) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry); + const struct file_operations *proxy_fops = filp->f_op; + int r = 0; + + /* + * We must not protect this against removal races here: the + * original releaser should be called unconditionally in order + * not to leak any resources. Releasers must not assume that + * ->i_private is still being meaningful here. + */ + if (real_fops->release) + r = real_fops->release(inode, filp); + + replace_fops(filp, d_inode(dentry)->i_fop); + kfree((void *)proxy_fops); + fops_put(real_fops); + return 0; +} + +static void __full_proxy_fops_init(struct file_operations *proxy_fops, + const struct file_operations *real_fops) +{ + proxy_fops->release = full_proxy_release; + if (real_fops->llseek) + proxy_fops->llseek = full_proxy_llseek; + if (real_fops->read) + proxy_fops->read = full_proxy_read; + if (real_fops->write) + proxy_fops->write = full_proxy_write; + if (real_fops->poll) + proxy_fops->poll = full_proxy_poll; + if (real_fops->unlocked_ioctl) + proxy_fops->unlocked_ioctl = full_proxy_unlocked_ioctl; +} + +static int full_proxy_open(struct inode *inode, struct file *filp) +{ + const struct dentry *dentry = F_DENTRY(filp); + const struct file_operations *real_fops = NULL; + struct file_operations *proxy_fops = NULL; + int srcu_idx, r; + + r = debugfs_use_file_start(dentry, &srcu_idx); + if (r) { + r = -ENOENT; + goto out; + } + + real_fops = REAL_FOPS_DEREF(dentry); + real_fops = fops_get(real_fops); + if (!real_fops) { + /* Huh? Module did not cleanup after itself at exit? */ + WARN(1, "debugfs file owner did not clean up at exit: %pd", + dentry); + r = -ENXIO; + goto out; + } + + proxy_fops = kzalloc(sizeof(*proxy_fops), GFP_KERNEL); + if (!proxy_fops) { + r = -ENOMEM; + goto free_proxy; + } + __full_proxy_fops_init(proxy_fops, real_fops); + replace_fops(filp, proxy_fops); + + if (real_fops->open) { + r = real_fops->open(inode, filp); + + if (filp->f_op != proxy_fops) { + /* No protection against file removal anymore. */ + WARN(1, "debugfs file owner replaced proxy fops: %pd", + dentry); + goto free_proxy; + } + } + + goto out; +free_proxy: + kfree(proxy_fops); + fops_put(real_fops); +out: + debugfs_use_file_finish(srcu_idx); + return r; +} + +const struct file_operations debugfs_full_proxy_file_operations = { + .open = full_proxy_open, +}; + static struct dentry *debugfs_create_mode(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 2905dd160575..136f269f01de 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -300,6 +300,37 @@ static struct dentry *end_creating(struct dentry *dentry) return dentry; } +static struct dentry *__debugfs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *proxy_fops, + const struct file_operations *real_fops) +{ + struct dentry *dentry; + struct inode *inode; + + if (!(mode & S_IFMT)) + mode |= S_IFREG; + BUG_ON(!S_ISREG(mode)); + dentry = start_creating(name, parent); + + if (IS_ERR(dentry)) + return NULL; + + inode = debugfs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = mode; + inode->i_private = data; + + inode->i_fop = proxy_fops; + dentry->d_fsdata = (void *)real_fops; + + d_instantiate(dentry, inode); + fsnotify_create(d_inode(dentry->d_parent), dentry); + return end_creating(dentry); +} + /** * debugfs_create_file - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. @@ -330,33 +361,24 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { - struct dentry *dentry; - struct inode *inode; - - if (!(mode & S_IFMT)) - mode |= S_IFREG; - BUG_ON(!S_ISREG(mode)); - dentry = start_creating(name, parent); - - if (IS_ERR(dentry)) - return NULL; - - inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) - return failed_creating(dentry); - inode->i_mode = mode; - inode->i_private = data; + return __debugfs_create_file(name, mode, parent, data, + fops ? &debugfs_full_proxy_file_operations : + &debugfs_noop_file_operations, + fops); +} +EXPORT_SYMBOL_GPL(debugfs_create_file); - inode->i_fop = fops ? &debugfs_open_proxy_file_operations - : &debugfs_noop_file_operations; - dentry->d_fsdata = (void *)fops; +struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ - d_instantiate(dentry, inode); - fsnotify_create(d_inode(dentry->d_parent), dentry); - return end_creating(dentry); + return __debugfs_create_file(name, mode, parent, data, + fops ? &debugfs_open_proxy_file_operations : + &debugfs_noop_file_operations, + fops); } -EXPORT_SYMBOL_GPL(debugfs_create_file); /** * debugfs_create_file_size - create a file in the debugfs filesystem @@ -579,6 +601,7 @@ void debugfs_remove(struct dentry *dentry) inode_unlock(d_inode(parent)); if (!ret) simple_release_fs(&debugfs_mount, &debugfs_mount_count); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove); @@ -657,6 +680,7 @@ void debugfs_remove_recursive(struct dentry *dentry) if (!__debugfs_remove(child, parent)) simple_release_fs(&debugfs_mount, &debugfs_mount_count); inode_unlock(d_inode(parent)); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index c7aaa5cb6685..bba52634b995 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -13,12 +13,14 @@ #define _DEBUGFS_INTERNAL_H_ struct file_operations; -struct srcu_struct; /* declared over in file.c */ extern const struct file_operations debugfs_noop_file_operations; extern const struct file_operations debugfs_open_proxy_file_operations; +extern const struct file_operations debugfs_full_proxy_file_operations; -extern struct srcu_struct debugfs_srcu; +struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); #endif /* _DEBUGFS_INTERNAL_H_ */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index fcafe2d389f9..a63e6ea3321c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -19,9 +19,11 @@ #include #include +#include struct device; struct file_operations; +struct srcu_struct; struct debugfs_blob_wrapper { void *data; @@ -41,6 +43,8 @@ struct debugfs_regset32 { extern struct dentry *arch_debugfs_dir; +extern struct srcu_struct debugfs_srcu; + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, umode_t mode, @@ -65,6 +69,11 @@ struct dentry *debugfs_create_automount(const char *name, void debugfs_remove(struct dentry *dentry); void debugfs_remove_recursive(struct dentry *dentry); +int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) + __acquires(&debugfs_srcu); + +void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); + struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -173,6 +182,17 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { } +static inline int debugfs_use_file_start(const struct dentry *dentry, + int *srcu_idx) + __acquires(&debugfs_srcu) +{ + return 0; +} + +static inline void debugfs_use_file_finish(int srcu_idx) + __releases(&debugfs_srcu) +{ } + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { -- cgit v1.2.3 From c64688081490321f2d23a292ef24e60bb321f3f1 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:15 +0100 Subject: debugfs: add support for self-protecting attribute file fops In order to protect them against file removal issues, debugfs_create_file() creates a lifetime managing proxy around each struct file_operations handed in. In cases where this struct file_operations is able to manage file lifetime by itself already, the proxy created by debugfs is a waste of resources. The most common class of struct file_operations given to debugfs are those defined by means of the DEFINE_SIMPLE_ATTRIBUTE() macro. Introduce a DEFINE_DEBUGFS_ATTRIBUTE() macro to allow any struct file_operations of this class to be easily made file lifetime aware and thus, to be operated unproxied. Specifically, introduce debugfs_attr_read() and debugfs_attr_write() which wrap simple_attr_read() and simple_attr_write() under the protection of a debugfs_use_file_start()/debugfs_use_file_finish() pair. Make DEFINE_DEBUGFS_ATTRIBUTE() set the defined struct file_operations' ->read() and ->write() members to these wrappers. Export debugfs_create_file_unsafe() in order to allow debugfs users to create their files in non-proxying operation mode. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 28 ++++++++++++++++++++++++++++ fs/debugfs/inode.c | 28 ++++++++++++++++++++++++++++ include/linux/debugfs.h | 26 ++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) (limited to 'include/linux') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 6eb58a8ed03c..8ef56d9499a4 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -284,6 +284,34 @@ const struct file_operations debugfs_full_proxy_file_operations = { .open = full_proxy_open, }; +ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + ssize_t ret; + int srcu_idx; + + ret = debugfs_use_file_start(F_DENTRY(file), &srcu_idx); + if (likely(!ret)) + ret = simple_attr_read(file, buf, len, ppos); + debugfs_use_file_finish(srcu_idx); + return ret; +} +EXPORT_SYMBOL_GPL(debugfs_attr_read); + +ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + ssize_t ret; + int srcu_idx; + + ret = debugfs_use_file_start(F_DENTRY(file), &srcu_idx); + if (likely(!ret)) + ret = simple_attr_write(file, buf, len, ppos); + debugfs_use_file_finish(srcu_idx); + return ret; +} +EXPORT_SYMBOL_GPL(debugfs_attr_write); + static struct dentry *debugfs_create_mode(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 136f269f01de..41e079a8da26 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -369,6 +369,33 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_file); +/** + * debugfs_create_file_unsafe - create a file in the debugfs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the debugfs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations that should be used for + * this file. + * + * debugfs_create_file_unsafe() is completely analogous to + * debugfs_create_file(), the only difference being that the fops + * handed it will not get protected against file removals by the + * debugfs core. + * + * It is your responsibility to protect your struct file_operation + * methods against file removals by means of debugfs_use_file_start() + * and debugfs_use_file_finish(). ->open() is still protected by + * debugfs though. + * + * Any struct file_operations defined by means of + * DEFINE_DEBUGFS_ATTRIBUTE() is protected against file removals and + * thus, may be used here. + */ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) @@ -379,6 +406,7 @@ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, &debugfs_noop_file_operations, fops); } +EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); /** * debugfs_create_file_size - create a file in the debugfs filesystem diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index a63e6ea3321c..1438e2322d5c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -50,6 +50,9 @@ extern struct srcu_struct debugfs_srcu; struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); +struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); struct dentry *debugfs_create_file_size(const char *name, umode_t mode, struct dentry *parent, void *data, @@ -74,6 +77,26 @@ int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); +ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos); +ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); + +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ + .write = debugfs_attr_write, \ + .llseek = generic_file_llseek, \ +} + struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -193,6 +216,9 @@ static inline void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ + static const struct file_operations __fops = { 0 } + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { -- cgit v1.2.3 From 93e9d8e836cb1a9a58b33eb6643bf061c6119ef2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2016 12:32:46 -0600 Subject: block: add ability to flag write back caching on a device Add an internal helper and flag for setting whether a queue has write back caching, or write through (or none). Add a sysfs file to show this as well, and make it changeable from user space. This will replace the (awkward) blk_queue_flush() interface that drivers currently use to inform the block layer of write cache state and capabilities. Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- Documentation/block/queue-sysfs.txt | 9 +++++++++ block/blk-settings.c | 26 +++++++++++++++++++++++++ block/blk-sysfs.c | 39 +++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 3 +++ 4 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index e5d914845be6..dce25d848d92 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -141,6 +141,15 @@ control of this block device to that new IO scheduler. Note that writing an IO scheduler name to this file will attempt to load that IO scheduler module, if it isn't already present in the system. +write_cache (RW) +---------------- +When read, this file will display whether the device has write back +caching enabled or not. It will return "write back" for the former +case, and "write through" for the latter. Writing to this file can +change the kernels view of the device, but it doesn't alter the +device state. This means that it might not be safe to toggle the +setting from "write back" to "write through", since that will also +eliminate cache flushes issued by the kernel. Jens Axboe , February 2009 diff --git a/block/blk-settings.c b/block/blk-settings.c index 331e4eee0dda..c903bee43cf8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -846,6 +846,32 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable) } EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); +/** + * blk_queue_write_cache - configure queue's write cache + * @q: the request queue for the device + * @wc: write back cache on or off + * @fua: device supports FUA writes, if true + * + * Tell the block layer about the write cache of @q. + */ +void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) +{ + spin_lock_irq(q->queue_lock); + if (wc) { + queue_flag_set(QUEUE_FLAG_WC, q); + q->flush_flags = REQ_FLUSH; + } else + queue_flag_clear(QUEUE_FLAG_WC, q); + if (fua) { + if (wc) + q->flush_flags |= REQ_FUA; + queue_flag_set(QUEUE_FLAG_FUA, q); + } else + queue_flag_clear(QUEUE_FLAG_FUA, q); + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(blk_queue_write_cache); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 995b58d46ed1..99205965f559 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -347,6 +347,38 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page, return ret; } +static ssize_t queue_wc_show(struct request_queue *q, char *page) +{ + if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + return sprintf(page, "write back\n"); + + return sprintf(page, "write through\n"); +} + +static ssize_t queue_wc_store(struct request_queue *q, const char *page, + size_t count) +{ + int set = -1; + + if (!strncmp(page, "write back", 10)) + set = 1; + else if (!strncmp(page, "write through", 13) || + !strncmp(page, "none", 4)) + set = 0; + + if (set == -1) + return -EINVAL; + + spin_lock_irq(q->queue_lock); + if (set) + queue_flag_set(QUEUE_FLAG_WC, q); + else + queue_flag_clear(QUEUE_FLAG_WC, q); + spin_unlock_irq(q->queue_lock); + + return count; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -478,6 +510,12 @@ static struct queue_sysfs_entry queue_poll_entry = { .store = queue_poll_store, }; +static struct queue_sysfs_entry queue_wc_entry = { + .attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR }, + .show = queue_wc_show, + .store = queue_wc_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -503,6 +541,7 @@ static struct attribute *default_attrs[] = { &queue_iostats_entry.attr, &queue_random_entry.attr, &queue_poll_entry.attr, + &queue_wc_entry.attr, NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bbaa76757018..ba72687c5654 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -491,6 +491,8 @@ struct request_queue { #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ #define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ +#define QUEUE_FLAG_WC 23 /* Write back caching */ +#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -1009,6 +1011,7 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); +extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); -- cgit v1.2.3 From 2245f6de6c68b225986229a2de78c240536f7f38 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2016 10:19:30 -0600 Subject: block: kill blk_queue_flush() We don't have any drivers left using it, so kill it off. Update documentation to use the newer blk_queue_write_cache(). Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- Documentation/block/writeback_cache_control.txt | 4 ++-- block/blk-settings.c | 20 -------------------- include/linux/blkdev.h | 1 - 3 files changed, 2 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt index 83407d36630a..59e0516cbf6b 100644 --- a/Documentation/block/writeback_cache_control.txt +++ b/Documentation/block/writeback_cache_control.txt @@ -71,7 +71,7 @@ requests that have a payload. For devices with volatile write caches the driver needs to tell the block layer that it supports flushing caches by doing: - blk_queue_flush(sdkp->disk->queue, REQ_FLUSH); + blk_queue_write_cache(sdkp->disk->queue, true, false); and handle empty REQ_FLUSH requests in its prep_fn/request_fn. Note that REQ_FLUSH requests with a payload are automatically turned into a sequence @@ -79,7 +79,7 @@ of an empty REQ_FLUSH request followed by the actual write by the block layer. For devices that also support the FUA bit the block layer needs to be told to pass through the REQ_FUA bit using: - blk_queue_flush(sdkp->disk->queue, REQ_FLUSH | REQ_FUA); + blk_queue_write_cache(sdkp->disk->queue, true, true); and the driver must handle write requests that have the REQ_FUA bit set in prep_fn/request_fn. If the FUA bit is not natively supported the block diff --git a/block/blk-settings.c b/block/blk-settings.c index c903bee43cf8..80d9327a214b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -820,26 +820,6 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); -/** - * blk_queue_flush - configure queue's cache flush capability - * @q: the request queue for the device - * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA - * - * Tell block layer cache flush capability of @q. If it supports - * flushing, REQ_FLUSH should be set. If it supports bypassing - * write cache for individual writes, REQ_FUA should be set. - */ -void blk_queue_flush(struct request_queue *q, unsigned int flush) -{ - WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); - - if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) - flush &= ~REQ_FUA; - - q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); -} -EXPORT_SYMBOL_GPL(blk_queue_flush); - void blk_queue_flush_queueable(struct request_queue *q, bool queueable) { q->flush_not_queueable = !queueable; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba72687c5654..f3f232fa505d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1009,7 +1009,6 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.3 From 0c034fe37718990e0ffdd9622bd6cc5b4f93111f Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 12 Apr 2016 17:46:39 -0300 Subject: mtd: Uninline mtd_write_oob and move it to mtdcore.c There's no reason for having mtd_write_oob inlined in mtd.h header. Move it to mtdcore.c where it belongs. Signed-off-by: Ezequiel Garcia Acked-by: Boris Brezillon Signed-off-by: Jacek Anaszewski --- drivers/mtd/mtdcore.c | 12 ++++++++++++ include/linux/mtd/mtd.h | 12 +----------- 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 309625130b21..99d83f3331b0 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -997,6 +997,18 @@ int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) } EXPORT_SYMBOL_GPL(mtd_read_oob); +int mtd_write_oob(struct mtd_info *mtd, loff_t to, + struct mtd_oob_ops *ops) +{ + ops->retlen = ops->oobretlen = 0; + if (!mtd->_write_oob) + return -EOPNOTSUPP; + if (!(mtd->flags & MTD_WRITEABLE)) + return -EROFS; + return mtd->_write_oob(mtd, to, ops); +} +EXPORT_SYMBOL_GPL(mtd_write_oob); + /* * Method to access the protection register area, present in some flash * devices. The user data is one time programmable but the factory data is read diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 771272187316..ef9fea4fc400 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -283,17 +283,7 @@ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf); int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops); - -static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to, - struct mtd_oob_ops *ops) -{ - ops->retlen = ops->oobretlen = 0; - if (!mtd->_write_oob) - return -EOPNOTSUPP; - if (!(mtd->flags & MTD_WRITEABLE)) - return -EROFS; - return mtd->_write_oob(mtd, to, ops); -} +int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops); int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf); -- cgit v1.2.3 From 4b721174c7976f26300e39aa42714fb5a54bebb5 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 12 Apr 2016 17:46:40 -0300 Subject: leds: trigger: Introduce a MTD (NAND/NOR) trigger This commit introduces a MTD trigger for flash (NAND/NOR) device activity. The implementation is copied from IDE disk. This trigger deprecates the "nand-disk" LED trigger, but for backwards compatibility, we still keep the "nand-disk" trigger around. The motivation for deprecating the "nand-disk" LED trigger is that it only works for NAND drivers, whereas the "mtd" LED trigger is more generic (in fact, "nand-disk" currently only works for certain NAND drivers). Signed-off-by: Ezequiel Garcia Acked-by: Boris Brezillon Signed-off-by: Jacek Anaszewski --- drivers/leds/trigger/Kconfig | 8 +++++++ drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-mtd.c | 45 ++++++++++++++++++++++++++++++++++++++ include/linux/leds.h | 6 +++++ 4 files changed, 60 insertions(+) create mode 100644 drivers/leds/trigger/ledtrig-mtd.c (limited to 'include/linux') diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index 554f5bfbeced..beac8c31c51b 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -41,6 +41,14 @@ config LEDS_TRIGGER_IDE_DISK This allows LEDs to be controlled by IDE disk activity. If unsure, say Y. +config LEDS_TRIGGER_MTD + bool "LED MTD (NAND/NOR) Trigger" + depends on MTD + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by MTD activity. + If unsure, say N. + config LEDS_TRIGGER_HEARTBEAT tristate "LED Heartbeat Trigger" depends on LEDS_TRIGGERS diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile index 547bf5c80e52..8cc64a4f4e25 100644 --- a/drivers/leds/trigger/Makefile +++ b/drivers/leds/trigger/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o obj-$(CONFIG_LEDS_TRIGGER_ONESHOT) += ledtrig-oneshot.o obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK) += ledtrig-ide-disk.o +obj-$(CONFIG_LEDS_TRIGGER_MTD) += ledtrig-mtd.o obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o diff --git a/drivers/leds/trigger/ledtrig-mtd.c b/drivers/leds/trigger/ledtrig-mtd.c new file mode 100644 index 000000000000..99b5b0a4d826 --- /dev/null +++ b/drivers/leds/trigger/ledtrig-mtd.c @@ -0,0 +1,45 @@ +/* + * LED MTD trigger + * + * Copyright 2016 Ezequiel Garcia + * + * Based on LED IDE-Disk Activity Trigger + * + * Copyright 2006 Openedhand Ltd. + * + * Author: Richard Purdie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include + +#define BLINK_DELAY 30 + +DEFINE_LED_TRIGGER(ledtrig_mtd); +DEFINE_LED_TRIGGER(ledtrig_nand); + +void ledtrig_mtd_activity(void) +{ + unsigned long blink_delay = BLINK_DELAY; + + led_trigger_blink_oneshot(ledtrig_mtd, + &blink_delay, &blink_delay, 0); + led_trigger_blink_oneshot(ledtrig_nand, + &blink_delay, &blink_delay, 0); +} +EXPORT_SYMBOL(ledtrig_mtd_activity); + +static int __init ledtrig_mtd_init(void) +{ + led_trigger_register_simple("mtd", &ledtrig_mtd); + led_trigger_register_simple("nand-disk", &ledtrig_nand); + + return 0; +} +device_initcall(ledtrig_mtd_init); diff --git a/include/linux/leds.h b/include/linux/leds.h index f203a8f89d30..19eb10278bea 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -329,6 +329,12 @@ extern void ledtrig_ide_activity(void); static inline void ledtrig_ide_activity(void) {} #endif +#ifdef CONFIG_LEDS_TRIGGER_MTD +extern void ledtrig_mtd_activity(void); +#else +static inline void ledtrig_mtd_activity(void) {} +#endif + #if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE) extern void ledtrig_flash_ctrl(bool on); extern void ledtrig_torch_ctrl(bool on); -- cgit v1.2.3 From bc405cd69a728d0a82bae8395fe43ec7b0afd1c6 Mon Sep 17 00:00:00 2001 From: Alexandre Macabies Date: Tue, 12 Apr 2016 18:53:00 +0200 Subject: ieee802154: add security bit check function ieee802154_is_secen checks if the 802.15.4 security bit is set in the frame control field. Signed-off-by: Alexander Aring Signed-off-by: Alexandre Macabies Reviewed-by: Stefan Schmidt Acked-by: Alan Ott Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index d3e415674dac..56090f195339 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -218,6 +218,7 @@ enum { /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 #define IEEE802154_FCTL_ACKREQ 0x0020 +#define IEEE802154_FCTL_SECEN 0x0004 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FTYPE_DATA 0x0001 @@ -232,6 +233,15 @@ static inline int ieee802154_is_data(__le16 fc) cpu_to_le16(IEEE802154_FTYPE_DATA); } +/** + * ieee802154_is_secen - check if Security bit is set + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee802154_is_secen(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_SECEN); +} + /** * ieee802154_is_ackreq - check if acknowledgment request bit is set * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From b7594148c73cb506487b5f00a6574beceea0e3a0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 11 Apr 2016 11:04:14 +0200 Subject: ieee802154: cleanups for ieee802154.h This patch removes some const from non-pointer types and fixes the function name for the ieee802154_is_valid_extended_unicast_addr comment. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Acked-by: Jukka Rissanen Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 56090f195339..9d84a924b747 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -270,17 +270,17 @@ static inline bool ieee802154_is_intra_pan(__le16 fc) * * @len: psdu len with (MHR + payload + MFR) */ -static inline bool ieee802154_is_valid_psdu_len(const u8 len) +static inline bool ieee802154_is_valid_psdu_len(u8 len) { return (len == IEEE802154_ACK_PSDU_LEN || (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** - * ieee802154_is_valid_psdu_len - check if extended addr is valid + * ieee802154_is_valid_extended_unicast_addr - check if extended addr is valid * @addr: extended addr to check */ -static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr) +static inline bool ieee802154_is_valid_extended_unicast_addr(__le64 addr) { /* Bail out if the address is all zero, or if the group * address bit is set. -- cgit v1.2.3 From 118a5cf8ae236cdfa1eb4f21530843a8494722ef Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 11 Apr 2016 11:04:15 +0200 Subject: ieee802154: add short address helpers This patch introduce some short address handling functionality into ieee802154 headers. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Acked-by: Jukka Rissanen Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 29 +++++++++++++++++++++++++++++ include/net/mac802154.h | 10 ++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 9d84a924b747..acedbb68a5a3 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -47,6 +47,7 @@ #define IEEE802154_ADDR_SHORT_UNSPEC 0xfffe #define IEEE802154_EXTENDED_ADDR_LEN 8 +#define IEEE802154_SHORT_ADDR_LEN 2 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 @@ -289,6 +290,34 @@ static inline bool ieee802154_is_valid_extended_unicast_addr(__le64 addr) !(addr & cpu_to_le64(0x0100000000000000ULL))); } +/** + * ieee802154_is_broadcast_short_addr - check if short addr is broadcast + * @addr: short addr to check + */ +static inline bool ieee802154_is_broadcast_short_addr(__le16 addr) +{ + return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)); +} + +/** + * ieee802154_is_unspec_short_addr - check if short addr is unspecified + * @addr: short addr to check + */ +static inline bool ieee802154_is_unspec_short_addr(__le16 addr) +{ + return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC)); +} + +/** + * ieee802154_is_valid_src_short_addr - check if source short address is valid + * @addr: short addr to check + */ +static inline bool ieee802154_is_valid_src_short_addr(__le16 addr) +{ + return !(ieee802154_is_broadcast_short_addr(addr) || + ieee802154_is_unspec_short_addr(addr)); +} + /** * ieee802154_random_extended_addr - generates a random extended address * @addr: extended addr pointer to place the random address diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 6cd7a70706a9..e465c8551ac3 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -287,6 +287,16 @@ static inline void ieee802154_le16_to_be16(void *be16_dst, const void *le16_src) put_unaligned_be16(get_unaligned_le16(le16_src), be16_dst); } +/** + * ieee802154_be16_to_le16 - copies and convert be16 to le16 + * @le16_dst: le16 destination pointer + * @be16_src: be16 source pointer + */ +static inline void ieee802154_be16_to_le16(void *le16_dst, const void *be16_src) +{ + put_unaligned_le16(get_unaligned_be16(be16_src), le16_dst); +} + /** * ieee802154_alloc_hw - Allocate a new hardware device * -- cgit v1.2.3 From f8e04d854506ddfeba9cb41b601972b28521f104 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:21 +0200 Subject: locking/rwsem: Get rid of __down_write_nested() This is no longer used anywhere and all callers (__down_write()) use 0 as a subclass. Ditch __down_write_nested() to make the code easier to follow. This shouldn't introduce any functional change. Signed-off-by: Michal Hocko Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-2-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- arch/s390/include/asm/rwsem.h | 7 +------ arch/sh/include/asm/rwsem.h | 5 ----- arch/sparc/include/asm/rwsem.h | 7 +------ arch/x86/include/asm/rwsem.h | 7 +------ include/asm-generic/rwsem.h | 7 +------ include/linux/rwsem-spinlock.h | 1 - kernel/locking/rwsem-spinlock.c | 7 +------ 7 files changed, 5 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index fead491dfc28..555d23b6b6d1 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -90,7 +90,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { signed long old, new, tmp; @@ -108,11 +108,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h index edab57265293..a5104bebd1eb 100644 --- a/arch/sh/include/asm/rwsem.h +++ b/arch/sh/include/asm/rwsem.h @@ -114,11 +114,6 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) -{ - __down_write(sem); -} - /* * implement exchange and add functionality */ diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h index 069bf4d663a1..e5a0d575bc7f 100644 --- a/arch/sparc/include/asm/rwsem.h +++ b/arch/sparc/include/asm/rwsem.h @@ -45,7 +45,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { long tmp; @@ -55,11 +55,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index ceec86eb68e9..4a8292a0d6e1 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -99,7 +99,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { long tmp; asm volatile("# beginning down_write\n\t" @@ -116,11 +116,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) : "memory", "cc"); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index d6d5dc98d7da..b8d8a6cf4ca8 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -53,7 +53,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { long tmp; @@ -63,11 +63,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index 561e8615528d..a733a5467e6c 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -34,7 +34,6 @@ struct rw_semaphore { extern void __down_read(struct rw_semaphore *sem); extern int __down_read_trylock(struct rw_semaphore *sem); extern void __down_write(struct rw_semaphore *sem); -extern void __down_write_nested(struct rw_semaphore *sem, int subclass); extern int __down_write_trylock(struct rw_semaphore *sem); extern void __up_read(struct rw_semaphore *sem); extern void __up_write(struct rw_semaphore *sem); diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 3a5048572065..bab26104a5d0 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -191,7 +191,7 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore */ -void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) +void __sched __down_write(struct rw_semaphore *sem) { struct rwsem_waiter waiter; struct task_struct *tsk; @@ -227,11 +227,6 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } -void __sched __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - /* * trylock for writing -- returns 1 if successful, 0 if contention */ -- cgit v1.2.3 From d47996082f52baa0ca8b48d26b3cbef5ede70a73 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:26 +0200 Subject: locking/rwsem: Introduce basis for down_write_killable() Introduce a generic implementation necessary for down_write_killable(). This is a trivial extension of the already existing down_write() call which can be interrupted by SIGKILL. This patch doesn't provide down_write_killable() yet because arches have to provide the necessary pieces before. rwsem_down_write_failed() which is a generic slow path for the write lock is extended to take a task state and renamed to __rwsem_down_write_failed_common(). The return value is either a valid semaphore pointer or ERR_PTR(-EINTR). rwsem_down_write_failed_killable() is exported as a new way to wait for the lock and be killable. For rwsem-spinlock implementation the current __down_write() it updated in a similar way as __rwsem_down_write_failed_common() except it doesn't need new exports just visible __down_write_killable(). Architectures which are not using the generic rwsem implementation are supposed to provide their __down_write_killable() implementation and use rwsem_down_write_failed_killable() for the slow path. Signed-off-by: Michal Hocko Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-7-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- include/asm-generic/rwsem.h | 12 ++++++++++++ include/linux/rwsem-spinlock.h | 1 + include/linux/rwsem.h | 2 ++ kernel/locking/rwsem-spinlock.c | 22 ++++++++++++++++++++-- kernel/locking/rwsem-xadd.c | 31 +++++++++++++++++++++++++------ 5 files changed, 60 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index b8d8a6cf4ca8..3fc94a046bf5 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -63,6 +63,18 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } +static inline int __down_write_killable(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_long_t *)&sem->count); + if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + return 0; +} + static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index a733a5467e6c..ae0528b834cd 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -34,6 +34,7 @@ struct rw_semaphore { extern void __down_read(struct rw_semaphore *sem); extern int __down_read_trylock(struct rw_semaphore *sem); extern void __down_write(struct rw_semaphore *sem); +extern int __must_check __down_write_killable(struct rw_semaphore *sem); extern int __down_write_trylock(struct rw_semaphore *sem); extern void __up_read(struct rw_semaphore *sem); extern void __up_write(struct rw_semaphore *sem); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 8f498cdde280..7d7ae029dac5 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #include #endif @@ -43,6 +44,7 @@ struct rw_semaphore { extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index bab26104a5d0..1591f6b3539f 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -191,11 +191,12 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore */ -void __sched __down_write(struct rw_semaphore *sem) +int __sched __down_write_common(struct rw_semaphore *sem, int state) { struct rwsem_waiter waiter; struct task_struct *tsk; unsigned long flags; + int ret = 0; raw_spin_lock_irqsave(&sem->wait_lock, flags); @@ -215,16 +216,33 @@ void __sched __down_write(struct rw_semaphore *sem) */ if (sem->count == 0) break; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (signal_pending_state(state, current)) { + ret = -EINTR; + goto out; + } + set_task_state(tsk, state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); raw_spin_lock_irqsave(&sem->wait_lock, flags); } /* got the lock */ sem->count = -1; +out: list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; +} + +void __sched __down_write(struct rw_semaphore *sem) +{ + __down_write_common(sem, TASK_UNINTERRUPTIBLE); +} + +int __sched __down_write_killable(struct rw_semaphore *sem) +{ + return __down_write_common(sem, TASK_KILLABLE); } /* diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a4d4de05b2d1..df4dcb883b50 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -433,12 +433,13 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) /* * Wait until we successfully acquire the write lock */ -__visible -struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) +static inline struct rw_semaphore * +__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) { long count; bool waiting = true; /* any queued threads before us */ struct rwsem_waiter waiter; + struct rw_semaphore *ret = sem; /* undo write bias from down_write operation, stop active locking */ count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); @@ -478,7 +479,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); /* wait until we successfully acquire the lock */ - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(state); while (true) { if (rwsem_try_write_lock(count, sem)) break; @@ -486,21 +487,39 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* Block until there are no active lockers. */ do { + if (signal_pending_state(state, current)) { + raw_spin_lock_irq(&sem->wait_lock); + ret = ERR_PTR(-EINTR); + goto out; + } schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(state); } while ((count = sem->count) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } +out: __set_current_state(TASK_RUNNING); - list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); - return sem; + return ret; +} + +__visible struct rw_semaphore * __sched +rwsem_down_write_failed(struct rw_semaphore *sem) +{ + return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(rwsem_down_write_failed); +__visible struct rw_semaphore * __sched +rwsem_down_write_failed_killable(struct rw_semaphore *sem) +{ + return __rwsem_down_write_failed_common(sem, TASK_KILLABLE); +} +EXPORT_SYMBOL(rwsem_down_write_failed_killable); + /* * handle waking up a waiter on the semaphore * - up_read/up_write has decremented the active part of count if we come here -- cgit v1.2.3 From 2c923e94cd9c6acff3b22f0ae29cfe65e2658b40 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 11 Apr 2016 16:38:34 +0200 Subject: sched/clock: Make local_clock()/cpu_clock() inline The local_clock/cpu_clock functions were changed to prevent a double identical test with sched_clock_cpu() when HAVE_UNSTABLE_SCHED_CLOCK is set. That resulted in one line functions. As these functions are in all the cases one line functions and in the hot path, it is useful to specify them as static inline in order to give a strong hint to the compiler. After verification, it appears the compiler does not inline them without this hint. Change those functions to static inline. sched_clock_cpu() is called via the inlined local_clock()/cpu_clock() functions from sched.h. So any module code including sched.h will reference sched_clock_cpu(). Thus it must be exported with the EXPORT_SYMBOL_GPL macro. Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1460385514-14700-2-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 32 ++++++++++++++++++++++++++++++-- kernel/sched/clock.c | 42 +----------------------------------------- 2 files changed, 31 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 52c4847b05e2..13c1c1d07270 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2303,8 +2303,6 @@ extern unsigned long long notrace sched_clock(void); /* * See the comment in kernel/sched/clock.c */ -extern u64 cpu_clock(int cpu); -extern u64 local_clock(void); extern u64 running_clock(void); extern u64 sched_clock_cpu(int cpu); @@ -2323,6 +2321,16 @@ static inline void sched_clock_idle_sleep_event(void) static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } + +static inline u64 cpu_clock(int cpu) +{ + return sched_clock(); +} + +static inline u64 local_clock(void) +{ + return sched_clock(); +} #else /* * Architectures can set this to 1 if they have specified @@ -2337,6 +2345,26 @@ extern void clear_sched_clock_stable(void); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); + +/* + * As outlined in clock.c, provides a fast, high resolution, nanosecond + * time source that is monotonic per cpu argument and has bounded drift + * between cpus. + * + * ######################### BIG FAT WARNING ########################## + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # + * # go backwards !! # + * #################################################################### + */ +static inline u64 cpu_clock(int cpu) +{ + return sched_clock_cpu(cpu); +} + +static inline u64 local_clock(void) +{ + return sched_clock_cpu(raw_smp_processor_id()); +} #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 30c4b202f0ba..e85a725e5c34 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -318,6 +318,7 @@ u64 sched_clock_cpu(int cpu) return clock; } +EXPORT_SYMBOL_GPL(sched_clock_cpu); void sched_clock_tick(void) { @@ -363,33 +364,6 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) } EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); -/* - * As outlined at the top, provides a fast, high resolution, nanosecond - * time source that is monotonic per cpu argument and has bounded drift - * between cpus. - * - * ######################### BIG FAT WARNING ########################## - * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # - * # go backwards !! # - * #################################################################### - */ -u64 cpu_clock(int cpu) -{ - return sched_clock_cpu(cpu); -} - -/* - * Similar to cpu_clock() for the current cpu. Time will only be observed - * to be monotonic if care is taken to only compare timestampt taken on the - * same CPU. - * - * See cpu_clock(). - */ -u64 local_clock(void) -{ - return sched_clock_cpu(raw_smp_processor_id()); -} - #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ void sched_clock_init(void) @@ -404,22 +378,8 @@ u64 sched_clock_cpu(int cpu) return sched_clock(); } - -u64 cpu_clock(int cpu) -{ - return sched_clock(); -} - -u64 local_clock(void) -{ - return sched_clock(); -} - #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ -EXPORT_SYMBOL_GPL(cpu_clock); -EXPORT_SYMBOL_GPL(local_clock); - /* * Running clock - returns the time that has elapsed while a guest has been * running. -- cgit v1.2.3 From c422025c185fb2bb28df65b1bbed7953480c7f87 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Mar 2016 16:24:41 +0200 Subject: dmaengine: dw: rename masters to reflect actual topology The source and destination masters are reflecting buses or their layers to where the different devices can be connected. The patch changes the master names to reflect which one is related to which independently on the transfer direction. The outcome of the change is that the memory data width is now always limited by a data width of the master which is dedicated to communicate to memory. The patch will not break anything since all current users have the same data width for all masters. Though it would be nice to revisit avr32 platforms to check what is the actual hardware topology in use there. It seems that it has one bus and two masters on it as stated by Table 8-2, that's why everything works independently on the master in use. The purpose of the sequential patch is to fix the driver for configuration of more than one bus. The change is done in the assumption that src_master and dst_master are reflecting a connection to the memory and peripheral correspondently on avr32 and otherwise on the rest. Acked-by: Hans-Christian Egtvedt Acked-by: Mark Brown Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/snps-dma.txt | 4 ++-- arch/avr32/mach-at32ap/at32ap700x.c | 16 ++++++++-------- drivers/ata/sata_dwc_460ex.c | 4 ++-- drivers/dma/dw/core.c | 19 +++++++++---------- drivers/dma/dw/platform.c | 12 ++++++------ drivers/dma/dw/regs.h | 4 ++-- drivers/spi/spi-pxa2xx-pci.c | 8 ++++---- drivers/tty/serial/8250/8250_pci.c | 8 ++++---- include/linux/platform_data/dma-dw.h | 8 ++++---- 9 files changed, 41 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt index c261598164a7..c99c1ffac199 100644 --- a/Documentation/devicetree/bindings/dma/snps-dma.txt +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt @@ -47,8 +47,8 @@ The four cells in order are: 1. A phandle pointing to the DMA controller 2. The DMA request line number -3. Source master for transfers on allocated channel -4. Destination master for transfers on allocated channel +3. Memory master for transfers on allocated channel +4. Peripheral master for transfers on allocated channel Example: diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index bf445aa48282..00d6dcc1d9b6 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1365,8 +1365,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) slave->dma_dev = &dw_dmac0_device.dev; slave->src_id = 0; slave->dst_id = 1; - slave->src_master = 1; - slave->dst_master = 0; + slave->m_master = 1; + slave->p_master = 0; data->dma_slave = slave; data->dma_filter = at32_mci_dma_filter; @@ -2061,16 +2061,16 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, if (flags & AC97C_CAPTURE) { rx_dws->dma_dev = &dw_dmac0_device.dev; rx_dws->src_id = 3; - rx_dws->src_master = 0; - rx_dws->dst_master = 1; + rx_dws->m_master = 0; + rx_dws->p_master = 1; } /* Check if DMA slave interface for playback should be configured. */ if (flags & AC97C_PLAYBACK) { tx_dws->dma_dev = &dw_dmac0_device.dev; tx_dws->dst_id = 4; - tx_dws->src_master = 0; - tx_dws->dst_master = 1; + tx_dws->m_master = 0; + tx_dws->p_master = 1; } if (platform_device_add_data(pdev, data, @@ -2141,8 +2141,8 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data) dws->dma_dev = &dw_dmac0_device.dev; dws->dst_id = 2; - dws->src_master = 0; - dws->dst_master = 1; + dws->m_master = 0; + dws->p_master = 1; if (platform_device_add_data(pdev, data, sizeof(struct atmel_abdac_pdata))) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 902034991517..80bdcabc293f 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -201,8 +201,8 @@ static struct sata_dwc_host_priv host_pvt; static struct dw_dma_slave sata_dwc_dma_dws = { .src_id = 0, .dst_id = 0, - .src_master = 0, - .dst_master = 1, + .m_master = 1, + .p_master = 0, }; /* diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 97199b3c25a2..5bd7873a02c6 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -50,8 +50,8 @@ | DWC_CTLL_SRC_MSIZE(_smsize) \ | DWC_CTLL_LLP_D_EN \ | DWC_CTLL_LLP_S_EN \ - | DWC_CTLL_DMS(_dwc->dst_master) \ - | DWC_CTLL_SMS(_dwc->src_master)); \ + | DWC_CTLL_DMS(_dwc->p_master) \ + | DWC_CTLL_SMS(_dwc->m_master)); \ }) /* @@ -709,8 +709,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dwc->direction = DMA_MEM_TO_MEM; - data_width = min_t(unsigned int, dw->data_width[dwc->src_master], - dw->data_width[dwc->dst_master]); + data_width = dw->data_width[dwc->m_master]; src_width = dst_width = min_t(unsigned int, data_width, dwc_fast_ffs(src | dest | len)); @@ -802,7 +801,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : DWC_CTLL_FC(DW_DMA_FC_D_M2P); - data_width = dw->data_width[dwc->src_master]; + data_width = dw->data_width[dwc->m_master]; for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; @@ -859,7 +858,7 @@ slave_sg_todev_fill_desc: ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : DWC_CTLL_FC(DW_DMA_FC_D_P2M); - data_width = dw->data_width[dwc->dst_master]; + data_width = dw->data_width[dwc->m_master]; for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; @@ -937,8 +936,8 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) dwc->src_id = dws->src_id; dwc->dst_id = dws->dst_id; - dwc->src_master = dws->src_master; - dwc->dst_master = dws->dst_master; + dwc->m_master = dws->m_master; + dwc->p_master = dws->p_master; return true; } @@ -1227,8 +1226,8 @@ static void dwc_free_chan_resources(struct dma_chan *chan) dwc->src_id = 0; dwc->dst_id = 0; - dwc->src_master = 0; - dwc->dst_master = 0; + dwc->m_master = 0; + dwc->p_master = 0; dwc->initialized = false; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 26edbe3a27ac..23616c57645c 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, slave.src_id = dma_spec->args[0]; slave.dst_id = dma_spec->args[0]; - slave.src_master = dma_spec->args[1]; - slave.dst_master = dma_spec->args[2]; + slave.m_master = dma_spec->args[1]; + slave.p_master = dma_spec->args[2]; if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS || slave.dst_id >= DW_DMA_MAX_NR_REQUESTS || - slave.src_master >= dw->nr_masters || - slave.dst_master >= dw->nr_masters)) + slave.m_master >= dw->nr_masters || + slave.p_master >= dw->nr_masters)) return NULL; dma_cap_zero(cap); @@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) .dma_dev = dma_spec->dev, .src_id = dma_spec->slave_id, .dst_id = dma_spec->slave_id, - .src_master = 1, - .dst_master = 0, + .m_master = 0, + .p_master = 1, }; return dw_dma_filter(chan, &slave); diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 0a50c18d85b8..a63d62bbffe2 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -249,8 +249,8 @@ struct dw_dma_chan { /* custom slave configuration */ u8 src_id; u8 dst_id; - u8 src_master; - u8 dst_master; + u8 m_master; + u8 p_master; /* configuration passed via .device_config */ struct dma_slave_config dma_sconfig; diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 520ed1dd5780..4fd7f9802f1b 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -144,16 +144,16 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, struct dw_dma_slave *slave = c->tx_param; slave->dma_dev = &dma_dev->dev; - slave->src_master = 1; - slave->dst_master = 0; + slave->m_master = 0; + slave->p_master = 1; } if (c->rx_param) { struct dw_dma_slave *slave = c->rx_param; slave->dma_dev = &dma_dev->dev; - slave->src_master = 1; - slave->dst_master = 0; + slave->m_master = 0; + slave->p_master = 1; } spi_pdata.dma_filter = lpss_dma_filter; diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 98862aa5bb58..5eea74d7f9f4 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1454,13 +1454,13 @@ byt_serial_setup(struct serial_private *priv, return -EINVAL; } - rx_param->src_master = 1; - rx_param->dst_master = 0; + rx_param->m_master = 0; + rx_param->p_master = 1; dma->rxconf.src_maxburst = 16; - tx_param->src_master = 1; - tx_param->dst_master = 0; + tx_param->m_master = 0; + tx_param->p_master = 1; dma->txconf.dst_maxburst = 16; diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 03b6095d3b18..b881b978e486 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -21,15 +21,15 @@ * @dma_dev: required DMA master device * @src_id: src request line * @dst_id: dst request line - * @src_master: src master for transfers on allocated channel. - * @dst_master: dest master for transfers on allocated channel. + * @m_master: memory master for transfers on allocated channel + * @p_master: peripheral master for transfers on allocated channel */ struct dw_dma_slave { struct device *dma_dev; u8 src_id; u8 dst_id; - u8 src_master; - u8 dst_master; + u8 m_master; + u8 p_master; }; /** -- cgit v1.2.3 From d2d5437bdfdde20a75bdf59db1c1a77721613b22 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 13 Apr 2016 15:29:45 +0530 Subject: regulator: max8973: add support for junction thermal warning The driver MAX8973 supports the driver for Maxim PMIC MAX77621. MAX77621 supports the junction temp warning at 120 degC and 140 degC which is configurable. It generates alert signal when junction temperature crosses these threshold. MAX77621 does not support the continuous temp monitoring of junction temperature. It just report whether junction temperature crossed the threshold or not. Add support to - Configure junction temp warning threshold via DT property to generate alert when it crosses the threshold. - Add support to interrupt the host from this device when alert occurred. - read the junction temp via thermal framework. Signed-off-by: Laxman Dewangan Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 1 + drivers/regulator/max8973-regulator.c | 97 +++++++++++++++++++++++++++++ include/linux/regulator/max8973-regulator.h | 5 ++ 3 files changed, 103 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index c77dc08b1202..129359f775d0 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -409,6 +409,7 @@ config REGULATOR_MAX8952 config REGULATOR_MAX8973 tristate "Maxim MAX8973 voltage regulator " depends on I2C + depends on THERMAL && THERMAL_OF select REGMAP_I2C help The MAXIM MAX8973 high-efficiency. three phase, DC-DC step-down diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c index 5b75b7c2e3ea..08d2f13eca00 100644 --- a/drivers/regulator/max8973-regulator.c +++ b/drivers/regulator/max8973-regulator.c @@ -38,6 +38,9 @@ #include #include #include +#include +#include +#include /* Register definitions */ #define MAX8973_VOUT 0x0 @@ -74,6 +77,7 @@ #define MAX8973_WDTMR_ENABLE BIT(6) #define MAX8973_DISCH_ENBABLE BIT(5) #define MAX8973_FT_ENABLE BIT(4) +#define MAX77621_T_JUNCTION_120 BIT(7) #define MAX8973_CKKADV_TRIP_MASK 0xC #define MAX8973_CKKADV_TRIP_DISABLE 0xC @@ -93,6 +97,12 @@ #define MAX8973_VOLATGE_STEP 6250 #define MAX8973_BUCK_N_VOLTAGE 0x80 +#define MAX77621_CHIPID_TJINT_S BIT(0) + +#define MAX77621_NORMAL_OPERATING_TEMP 100000 +#define MAX77621_TJINT_WARNING_TEMP_120 120000 +#define MAX77621_TJINT_WARNING_TEMP_140 140000 + enum device_id { MAX8973, MAX77621 @@ -112,6 +122,9 @@ struct max8973_chip { int curr_gpio_val; struct regulator_ops ops; enum device_id id; + int junction_temp_warning; + int irq; + struct thermal_zone_device *tz_device; }; /* @@ -391,6 +404,10 @@ static int max8973_init_dcdc(struct max8973_chip *max, if (pdata->control_flags & MAX8973_CONTROL_FREQ_SHIFT_9PER_ENABLE) control1 |= MAX8973_FREQSHIFT_9PER; + if ((pdata->junction_temp_warning == MAX77621_TJINT_WARNING_TEMP_120) && + (max->id == MAX77621)) + control2 |= MAX77621_T_JUNCTION_120; + if (!(pdata->control_flags & MAX8973_CONTROL_PULL_DOWN_ENABLE)) control2 |= MAX8973_DISCH_ENBABLE; @@ -457,6 +474,79 @@ static int max8973_init_dcdc(struct max8973_chip *max, return ret; } +static int max8973_thermal_read_temp(void *data, int *temp) +{ + struct max8973_chip *mchip = data; + unsigned int val; + int ret; + + ret = regmap_read(mchip->regmap, MAX8973_CHIPID1, &val); + if (ret < 0) { + dev_err(mchip->dev, "Failed to read register CHIPID1, %d", ret); + return ret; + } + + /* +1 degC to trigger cool devive */ + if (val & MAX77621_CHIPID_TJINT_S) + *temp = mchip->junction_temp_warning + 1000; + else + *temp = MAX77621_NORMAL_OPERATING_TEMP; + + return 0; +} + +static irqreturn_t max8973_thermal_irq(int irq, void *data) +{ + struct max8973_chip *mchip = data; + + thermal_zone_device_update(mchip->tz_device); + + return IRQ_HANDLED; +} + +static const struct thermal_zone_of_device_ops max77621_tz_ops = { + .get_temp = max8973_thermal_read_temp, +}; + +static int max8973_thermal_init(struct max8973_chip *mchip) +{ + struct thermal_zone_device *tzd; + struct irq_data *irq_data; + unsigned long irq_flags = 0; + int ret; + + if (mchip->id != MAX77621) + return 0; + + tzd = devm_thermal_zone_of_sensor_register(mchip->dev, 0, mchip, + &max77621_tz_ops); + if (IS_ERR(tzd)) { + ret = PTR_ERR(tzd); + dev_err(mchip->dev, "Failed to register thermal sensor: %d\n", + ret); + return ret; + } + + if (mchip->irq <= 0) + return 0; + + irq_data = irq_get_irq_data(mchip->irq); + if (irq_data) + irq_flags = irqd_get_trigger_type(irq_data); + + ret = devm_request_threaded_irq(mchip->dev, mchip->irq, NULL, + max8973_thermal_irq, + IRQF_ONESHOT | IRQF_SHARED | irq_flags, + dev_name(mchip->dev), mchip); + if (ret < 0) { + dev_err(mchip->dev, "Failed to request irq %d, %d\n", + mchip->irq, ret); + return ret; + } + + return 0; +} + static const struct regmap_config max8973_regmap_config = { .reg_bits = 8, .val_bits = 8, @@ -521,6 +611,11 @@ static struct max8973_regulator_platform_data *max8973_parse_dt( pdata->control_flags |= MAX8973_CONTROL_CLKADV_TRIP_DISABLED; } + pdata->junction_temp_warning = MAX77621_TJINT_WARNING_TEMP_140; + ret = of_property_read_u32(np, "junction-warn-millicelsius", &pval); + if (!ret && (pval <= MAX77621_TJINT_WARNING_TEMP_120)) + pdata->junction_temp_warning = MAX77621_TJINT_WARNING_TEMP_120; + return pdata; } @@ -608,6 +703,7 @@ static int max8973_probe(struct i2c_client *client, max->enable_external_control = pdata->enable_ext_control; max->curr_gpio_val = pdata->dvs_def_state; max->curr_vout_reg = MAX8973_VOUT + pdata->dvs_def_state; + max->junction_temp_warning = pdata->junction_temp_warning; if (gpio_is_valid(max->enable_gpio)) max->enable_external_control = true; @@ -718,6 +814,7 @@ static int max8973_probe(struct i2c_client *client, return ret; } + max8973_thermal_init(max); return 0; } diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h index f6a8a16a0d4d..2fcb9980262a 100644 --- a/include/linux/regulator/max8973-regulator.h +++ b/include/linux/regulator/max8973-regulator.h @@ -54,6 +54,10 @@ * @reg_init_data: The regulator init data. * @control_flags: Control flags which are ORed value of above flags to * configure device. + * @junction_temp_warning: Junction temp in millicelcius on which warning need + * to be set. Thermal functionality is only supported on + * MAX77621. The threshold warning supported by MAX77621 + * are 120C and 140C. * @enable_ext_control: Enable the voltage enable/disable through external * control signal from EN input pin. If it is false then * voltage output will be enabled/disabled through EN bit of @@ -67,6 +71,7 @@ struct max8973_regulator_platform_data { struct regulator_init_data *reg_init_data; unsigned long control_flags; + unsigned long junction_temp_warning; bool enable_ext_control; int enable_gpio; int dvs_gpio; -- cgit v1.2.3 From c888a8f95ae5b1067855235b3b71c1ebccf504f5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Apr 2016 13:33:19 -0600 Subject: block: kill off q->flush_flags Now that we converted everything to the newer block write cache interface, kill off the queue flush_flags and queueable flush entries. Signed-off-by: Jens Axboe --- block/blk-core.c | 3 ++- block/blk-flush.c | 11 ++++++----- block/blk-settings.c | 18 ++++++++++-------- drivers/block/xen-blkback/xenbus.c | 2 +- drivers/md/dm-table.c | 12 ++++++------ drivers/md/raid5-cache.c | 3 ++- drivers/target/target_core_iblock.c | 6 +++--- include/linux/blkdev.h | 5 ++--- 8 files changed, 32 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index c50227796a26..2475b1c72773 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1964,7 +1964,8 @@ generic_make_request_checks(struct bio *bio) * drivers without flush support don't have to worry * about them. */ - if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && + !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); if (!nr_sectors) { err = 0; diff --git a/block/blk-flush.c b/block/blk-flush.c index 9c423e53324a..b1c91d229e5e 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -95,17 +95,18 @@ enum { static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq); -static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) +static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq) { unsigned int policy = 0; if (blk_rq_sectors(rq)) policy |= REQ_FSEQ_DATA; - if (fflags & REQ_FLUSH) { + if (fflags & (1UL << QUEUE_FLAG_WC)) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) + if (!(fflags & (1UL << QUEUE_FLAG_FUA)) && + (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } return policy; @@ -384,7 +385,7 @@ static void mq_flush_data_end_io(struct request *rq, int error) void blk_insert_flush(struct request *rq) { struct request_queue *q = rq->q; - unsigned int fflags = q->flush_flags; /* may change, cache */ + unsigned long fflags = q->queue_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); @@ -393,7 +394,7 @@ void blk_insert_flush(struct request *rq) * REQ_FLUSH and FUA for the driver. */ rq->cmd_flags &= ~REQ_FLUSH; - if (!(fflags & REQ_FUA)) + if (!(fflags & (1UL << QUEUE_FLAG_FUA))) rq->cmd_flags &= ~REQ_FUA; /* diff --git a/block/blk-settings.c b/block/blk-settings.c index 80d9327a214b..f679ae122843 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -822,7 +822,12 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment); void blk_queue_flush_queueable(struct request_queue *q, bool queueable) { - q->flush_not_queueable = !queueable; + spin_lock_irq(q->queue_lock); + if (queueable) + clear_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); + else + set_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); + spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); @@ -837,16 +842,13 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) { spin_lock_irq(q->queue_lock); - if (wc) { + if (wc) queue_flag_set(QUEUE_FLAG_WC, q); - q->flush_flags = REQ_FLUSH; - } else + else queue_flag_clear(QUEUE_FLAG_WC, q); - if (fua) { - if (wc) - q->flush_flags |= REQ_FUA; + if (fua) queue_flag_set(QUEUE_FLAG_FUA, q); - } else + else queue_flag_clear(QUEUE_FLAG_FUA, q); spin_unlock_irq(q->queue_lock); } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 26aa080e243c..3355f1cdd4e5 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -477,7 +477,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->type |= VDISK_REMOVABLE; q = bdev_get_queue(bdev); - if (q && q->flush_flags) + if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags)) vbd->flush_support = true; if (q && blk_queue_secdiscard(q)) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4b1ffc0abe11..626a5ec04466 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1348,13 +1348,13 @@ static void dm_table_verify_integrity(struct dm_table *t) static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - unsigned flush = (*(unsigned *)data); + unsigned long flush = (unsigned long) data; struct request_queue *q = bdev_get_queue(dev->bdev); - return q && (q->flush_flags & flush); + return q && (q->queue_flags & flush); } -static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) +static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) { struct dm_target *ti; unsigned i = 0; @@ -1375,7 +1375,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) return true; if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, device_flush_capable, &flush)) + ti->type->iterate_devices(ti, device_flush_capable, (void *) flush)) return true; } @@ -1518,9 +1518,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); - if (dm_table_supports_flush(t, REQ_FLUSH)) { + if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { wc = true; - if (dm_table_supports_flush(t, REQ_FUA)) + if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA))) fua = true; } blk_queue_write_cache(q, wc, fua); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 9531f5f05b93..26f14970a858 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1188,6 +1188,7 @@ ioerr: int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { + struct request_queue *q = bdev_get_queue(rdev->bdev); struct r5l_log *log; if (PAGE_SIZE != 4096) @@ -1197,7 +1198,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) return -ENOMEM; log->rdev = rdev; - log->need_cache_flush = (rdev->bdev->bd_disk->queue->flush_flags != 0); + log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0; log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, sizeof(rdev->mddev->uuid)); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 026a758e5778..7c4efb4417b0 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -687,10 +687,10 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, * Force writethrough using WRITE_FUA if a volatile write cache * is not enabled, or if initiator set the Force Unit Access bit. */ - if (q->flush_flags & REQ_FUA) { + if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) { if (cmd->se_cmd_flags & SCF_FUA) rw = WRITE_FUA; - else if (!(q->flush_flags & REQ_FLUSH)) + else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) rw = WRITE_FUA; else rw = WRITE; @@ -836,7 +836,7 @@ static bool iblock_get_write_cache(struct se_device *dev) struct block_device *bd = ib_dev->ibd_bd; struct request_queue *q = bdev_get_queue(bd); - return q->flush_flags & REQ_FLUSH; + return test_bit(QUEUE_FLAG_WC, &q->queue_flags); } static const struct target_backend_ops iblock_ops = { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3f232fa505d..57c085917da6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -433,8 +433,6 @@ struct request_queue { /* * for flush operations */ - unsigned int flush_flags; - unsigned int flush_not_queueable:1; struct blk_flush_queue *fq; struct list_head requeue_list; @@ -493,6 +491,7 @@ struct request_queue { #define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 23 /* Write back caching */ #define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ +#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -1365,7 +1364,7 @@ static inline unsigned int block_size(struct block_device *bdev) static inline bool queue_flush_queueable(struct request_queue *q) { - return !q->flush_not_queueable; + return !test_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); } typedef struct {struct page *v;} Sector; -- cgit v1.2.3 From 7d35812c3214afa5b37a675113555259cfd67b98 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:23 +0200 Subject: netfilter: x_tables: add and use xt_check_entry_offsets Currently arp/ip and ip6tables each implement a short helper to check that the target offset is large enough to hold one xt_entry_target struct and that t->u.target_size fits within the current rule. Unfortunately these checks are not sufficient. To avoid adding new tests to all of ip/ip6/arptables move the current checks into a helper, then extend this helper in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 ++++ net/ipv4/netfilter/arp_tables.c | 11 +---------- net/ipv4/netfilter/ip_tables.c | 12 +----------- net/ipv6/netfilter/ip6_tables.c | 12 +----------- net/netfilter/x_tables.c | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 80a305b85323..0de0862897a4 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -242,6 +242,10 @@ void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); +int xt_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ec37f7c3a033..74668c1d3243 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -496,19 +496,10 @@ next: static inline int check_entry(const struct arpt_entry *e) { - const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) - return -EINVAL; - - t = arpt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static inline int check_target(struct arpt_entry *e, const char *name) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 503038ea7735..71c204d4ca5f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -590,20 +590,10 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) static int check_entry(const struct ipt_entry *e) { - const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ipt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static int diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 126f2a0f006a..24ae7f458b3b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -602,20 +602,10 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) static int check_entry(const struct ip6t_entry *e) { - const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ip6t_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 582c9cfd6567..1f44bfa8dd94 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -541,6 +541,40 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, EXPORT_SYMBOL_GPL(xt_compat_match_to_user); #endif /* CONFIG_COMPAT */ +/** + * xt_check_entry_offsets - validate arp/ip/ip6t_entry + * + * @base: pointer to arp/ip/ip6t_entry + * @target_offset: the arp/ip/ip6_t->target_offset + * @next_offset: the arp/ip/ip6_t->next_offset + * + * validates that target_offset and next_offset are sane. + * + * The arp/ip/ip6t_entry structure @base must have passed following tests: + * - it must point to a valid memory location + * - base to base + next_offset must be accessible, i.e. not exceed allocated + * length. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset) +{ + const struct xt_entry_target *t; + const char *e = base; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_check_entry_offsets); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { -- cgit v1.2.3 From fc1221b3a163d1386d1052184202d5dc50d302d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:26 +0200 Subject: netfilter: x_tables: add compat version of xt_check_entry_offsets 32bit rulesets have different layout and alignment requirements, so once more integrity checks get added to xt_check_entry_offsets it will reject well-formed 32bit rulesets. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 +++ net/ipv4/netfilter/arp_tables.c | 3 ++- net/ipv4/netfilter/ip_tables.c | 3 ++- net/ipv6/netfilter/ip6_tables.c | 3 ++- net/netfilter/x_tables.c | 22 ++++++++++++++++++++++ 5 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 0de0862897a4..08de48bbe92e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -494,6 +494,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); +int xt_compat_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); #endif /* CONFIG_COMPAT */ #endif /* _X_TABLES_H */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 24ad92a60b7a..ab8952a49bfa 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1254,7 +1254,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, e->target_offset, + e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cdf18502a047..7d24c872723f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1513,7 +1513,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, + e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e3783116ed48..73eee7b5fd60 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1525,7 +1525,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, + e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ec1b7183fff9..fa206ceb269f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -539,6 +539,27 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); + +int xt_compat_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset) +{ + const struct compat_xt_entry_target *t; + const char *e = base; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ /** @@ -549,6 +570,7 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user); * @next_offset: the arp/ip/ip6_t->next_offset * * validates that target_offset and next_offset are sane. + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location -- cgit v1.2.3 From ce683e5f9d045e5d67d1312a42b359cb2ab2a13c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:28 +0200 Subject: netfilter: x_tables: check for bogus target offset We're currently asserting that targetoff + targetsize <= nextoff. Extend it to also check that targetoff is >= sizeof(xt_entry). Since this is generic code, add an argument pointing to the start of the match/target, we can then derive the base structure size from the delta. We also need the e->elems pointer in a followup change to validate matches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 ++-- net/ipv4/netfilter/arp_tables.c | 5 +++-- net/ipv4/netfilter/ip_tables.c | 5 +++-- net/ipv6/netfilter/ip6_tables.c | 5 +++-- net/netfilter/x_tables.c | 17 +++++++++++++++-- 5 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 08de48bbe92e..30cfb1e943fb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -242,7 +242,7 @@ void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); -int xt_check_entry_offsets(const void *base, +int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); @@ -494,7 +494,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); -int xt_compat_check_entry_offsets(const void *base, +int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ab8952a49bfa..95ed4e454c60 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -592,7 +592,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1254,7 +1255,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, e->target_offset, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7d24c872723f..baab033d74e0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -754,7 +754,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1513,7 +1514,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 73eee7b5fd60..6957627c7931 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -766,7 +766,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1525,7 +1526,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 1cb7a271c024..e2a6f2a9051b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -546,14 +546,17 @@ struct compat_xt_standard_target { compat_uint_t verdict; }; -/* see xt_check_entry_offsets */ -int xt_compat_check_entry_offsets(const void *base, +int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) { + long size_of_base_struct = elems - (const char *)base; const struct compat_xt_entry_target *t; const char *e = base; + if (target_offset < size_of_base_struct) + return -EINVAL; + if (target_offset + sizeof(*t) > next_offset) return -EINVAL; @@ -577,12 +580,16 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * xt_check_entry_offsets - validate arp/ip/ip6t_entry * * @base: pointer to arp/ip/ip6t_entry + * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems * @target_offset: the arp/ip/ip6_t->target_offset * @next_offset: the arp/ip/ip6_t->next_offset * * validates that target_offset and next_offset are sane. * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * + * This function does not validate the targets or matches themselves, it + * only tests that all the offsets and sizes are correct. + * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location * - base to base + next_offset must be accessible, i.e. not exceed allocated @@ -591,12 +598,18 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * Return: 0 on success, negative errno on failure. */ int xt_check_entry_offsets(const void *base, + const char *elems, unsigned int target_offset, unsigned int next_offset) { + long size_of_base_struct = elems - (const char *)base; const struct xt_entry_target *t; const char *e = base; + /* target start is within the ip/ip6/arpt_entry struct */ + if (target_offset < size_of_base_struct) + return -EINVAL; + if (target_offset + sizeof(*t) > next_offset) return -EINVAL; -- cgit v1.2.3 From 0188346f21e6546498c2a0f84888797ad4063fc5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:33 +0200 Subject: netfilter: x_tables: xt_compat_match_from_user doesn't need a retval Always returned 0. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- net/ipv4/netfilter/arp_tables.c | 17 +++++------------ net/ipv4/netfilter/ip_tables.c | 26 +++++++++----------------- net/ipv6/netfilter/ip6_tables.c | 27 +++++++++------------------ net/netfilter/x_tables.c | 5 ++--- 5 files changed, 26 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 30cfb1e943fb..e2da9b90f1b8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -484,7 +484,7 @@ void xt_compat_init_offsets(u_int8_t af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1d1386dc159b..be514c676fad 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1310,7 +1310,7 @@ out: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1319,9 +1319,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, struct xt_target *target; struct arpt_entry *de; unsigned int origsize; - int ret, h; + int h; - ret = 0; origsize = *size; de = (struct arpt_entry *)*dstptr; memcpy(de, e, sizeof(struct arpt_entry)); @@ -1342,7 +1341,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int translate_compat_table(struct xt_table_info **pinfo, @@ -1421,16 +1419,11 @@ static int translate_compat_table(struct xt_table_info **pinfo, entry1 = newinfo->entries; pos = entry1; size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d70418604503..5c20eef980c1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1568,7 +1568,7 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1577,10 +1577,9 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct xt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); @@ -1589,11 +1588,9 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; @@ -1606,7 +1603,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int @@ -1729,16 +1725,12 @@ translate_compat_table(struct net *net, entry1 = newinfo->entries; pos = entry1; size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 8d082c557771..620d54c1c119 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1580,7 +1580,7 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1588,10 +1588,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ip6t_entry *)*dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); @@ -1600,11 +1599,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); @@ -1616,7 +1613,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int compat_check_entry(struct ip6t_entry *e, struct net *net, @@ -1737,17 +1733,12 @@ translate_compat_table(struct net *net, } entry1 = newinfo->entries; pos = entry1; - size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f9aa9715c32e..7e7173b68344 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -526,8 +526,8 @@ int xt_compat_match_offset(const struct xt_match *match) } EXPORT_SYMBOL_GPL(xt_compat_match_offset); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - unsigned int *size) +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; @@ -549,7 +549,6 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, *size += off; *dstptr += msize; - return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); -- cgit v1.2.3 From d7591f0c41ce3e67600a982bab6989ef0f07b3ce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 15:37:59 +0200 Subject: netfilter: x_tables: introduce and use xt_copy_counters_from_user The three variants use same copy&pasted code, condense this into a helper and use that. Make sure info.name is 0-terminated. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 ++ net/ipv4/netfilter/arp_tables.c | 48 +++---------------------- net/ipv4/netfilter/ip_tables.c | 48 +++---------------------- net/ipv6/netfilter/ip6_tables.c | 49 +++---------------------- net/netfilter/x_tables.c | 74 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 130 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e2da9b90f1b8..4dd9306c9d56 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,6 +251,9 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat); + struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8cefb7a2606b..60f5161abcb4 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1123,55 +1123,17 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - t = xt_find_table_lock(net, NFPROTO_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1179,7 +1141,7 @@ static int do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9340ce0a7549..735d1ee8c1ab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1307,55 +1307,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - t = xt_find_table_lock(net, AF_INET, name); + t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1363,7 +1325,7 @@ do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index aa010856a255..73e606c719ef 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1319,55 +1319,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - t = xt_find_table_lock(net, AF_INET6, name); + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1375,7 +1336,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 9ec23ffa43b4..c69c892231d7 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -752,6 +752,80 @@ int xt_check_target(struct xt_tgchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_target); +/** + * xt_copy_counters_from_user - copy counters and metadata from userspace + * + * @user: src pointer to userspace memory + * @len: alleged size of userspace memory + * @info: where to store the xt_counters_info metadata + * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel + * + * Copies counter meta data from @user and stores it in @info. + * + * vmallocs memory to hold the counters, then copies the counter data + * from @user to the new memory and returns a pointer to it. + * + * If @compat is true, @info gets converted automatically to the 64bit + * representation. + * + * The metadata associated with the counters is stored in @info. + * + * Return: returns pointer that caller has to test via IS_ERR(). + * If IS_ERR is false, caller has to vfree the pointer. + */ +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat) +{ + void *mem; + u64 size; + +#ifdef CONFIG_COMPAT + if (compat) { + /* structures only differ in size due to alignment */ + struct compat_xt_counters_info compat_tmp; + + if (len <= sizeof(compat_tmp)) + return ERR_PTR(-EINVAL); + + len -= sizeof(compat_tmp); + if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + return ERR_PTR(-EFAULT); + + strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + info->num_counters = compat_tmp.num_counters; + user += sizeof(compat_tmp); + } else +#endif + { + if (len <= sizeof(*info)) + return ERR_PTR(-EINVAL); + + len -= sizeof(*info); + if (copy_from_user(info, user, sizeof(*info)) != 0) + return ERR_PTR(-EFAULT); + + info->name[sizeof(info->name) - 1] = '\0'; + user += sizeof(*info); + } + + size = sizeof(struct xt_counters); + size *= info->num_counters; + + if (size != (u64)len) + return ERR_PTR(-EINVAL); + + mem = vmalloc(len); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, user, len) == 0) + return mem; + + vfree(mem); + return ERR_PTR(-EFAULT); +} +EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); + #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) { -- cgit v1.2.3 From f9a7cbbf18f1640907d6ca345b8337e4b50ea56f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 8 Apr 2016 17:51:54 +0200 Subject: net: force inlining of netif_tx_start/stop_queue, sock_hold, __sock_put Sometimes gcc mysteriously doesn't inline very small functions we expect to be inlined. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 Arguably, gcc should do better, but gcc people aren't willing to invest time into it, asking to use __always_inline instead. With this .config: http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os, the following functions get deinlined many times. netif_tx_stop_queue: 207 copies, 590 calls: 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 80 8f e0 01 00 00 01 lock orb $0x1,0x1e0(%rdi) 5d pop %rbp c3 retq netif_tx_start_queue: 47 copies, 111 calls 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 80 a7 e0 01 00 00 fe lock andb $0xfe,0x1e0(%rdi) 5d pop %rbp c3 retq sock_hold: 39 copies, 124 calls 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 ff 87 80 00 00 00 lock incl 0x80(%rdi) 5d pop %rbp c3 retq __sock_put: 6 copies, 13 calls 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 ff 8f 80 00 00 00 lock decl 0x80(%rdi) 5d pop %rbp c3 retq This patch fixes this via s/inline/__always_inline/. Code size decrease after the patch is ~2.5k: text data bss dec hex filename 56719876 56364551 36196352 149280779 8e5d80b vmlinux_before 56717440 56364551 36196352 149278343 8e5ce87 vmlinux Signed-off-by: Denys Vlasenko CC: David S. Miller CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- include/net/sock.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 166402ae3324..e906c6570b38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2787,7 +2787,7 @@ static inline void netif_tx_schedule_all(struct net_device *dev) netif_schedule_queue(netdev_get_tx_queue(dev, i)); } -static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) +static __always_inline void netif_tx_start_queue(struct netdev_queue *dev_queue) { clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -2837,7 +2837,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) } } -static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) +static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } diff --git a/include/net/sock.h b/include/net/sock.h index baba58770ac5..d997ec13a643 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -569,7 +569,7 @@ static inline bool __sk_del_node_init(struct sock *sk) modifications. */ -static inline void sock_hold(struct sock *sk) +static __always_inline void sock_hold(struct sock *sk) { atomic_inc(&sk->sk_refcnt); } @@ -577,7 +577,7 @@ static inline void sock_hold(struct sock *sk) /* Ungrab socket in the context, which assumes that socket refcnt cannot hit zero, f.e. it is true in context of any socketcall. */ -static inline void __sock_put(struct sock *sk) +static __always_inline void __sock_put(struct sock *sk) { atomic_dec(&sk->sk_refcnt); } -- cgit v1.2.3 From 743b03a83297690f0bd38c452a3bbb47d2be300a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 9 Apr 2016 11:29:58 -0700 Subject: net: remove netdevice gso_min_segs After introduction of ndo_features_check(), we believe that very specific checks for rare features should not be done in core networking stack. No driver uses gso_min_segs yet, so we revert this feature and save few instructions per tx packet in fast path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +--- net/core/dev.c | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e906c6570b38..9884fe9a6552 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1586,8 +1586,6 @@ enum netdev_priv_flags { * @gso_max_size: Maximum size of generic segmentation offload * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO - * @gso_min_segs: Minimum number of segments that can be passed to the - * NIC for GSO * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device @@ -1858,7 +1856,7 @@ struct net_device { unsigned int gso_max_size; #define GSO_MAX_SEGS 65535 u16 gso_max_segs; - u16 gso_min_segs; + #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif diff --git a/net/core/dev.c b/net/core/dev.c index d51343a821ed..09fb1ace9dc8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2831,7 +2831,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; - if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) + if (gso_segs > dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; /* If encapsulation offload request, verify we are testing @@ -7429,7 +7429,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; - dev->gso_min_segs = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); -- cgit v1.2.3 From 95114344ea78649b1797d00ab6e88147bef66fa4 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Sun, 10 Apr 2016 12:42:59 +0300 Subject: qed*: remove version dependency Inbox drivers don't need versioning scheme in order to guarantee compatibility, as both qed and qede are compiled from same codebase. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 2 -- drivers/net/ethernet/qlogic/qed/qed_l2.c | 8 +------- drivers/net/ethernet/qlogic/qed/qed_main.c | 11 ----------- drivers/net/ethernet/qlogic/qede/qede.h | 2 -- drivers/net/ethernet/qlogic/qede/qede_main.c | 11 +---------- include/linux/qed/qed_eth_if.h | 2 +- include/linux/qed/qed_if.h | 9 --------- 7 files changed, 3 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index fcb8e9ba51d9..a3ee9df16dfe 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -507,6 +507,4 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn, int qed_slowpath_irq_req(struct qed_hwfn *hwfn); -#define QED_ETH_INTERFACE_VERSION 300 - #endif /* _QED_H */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 3f35c6ca9252..e848d5a1f7f6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2043,14 +2043,8 @@ static const struct qed_eth_ops qed_eth_ops_pass = { .get_vport_stats = &qed_get_vport_stats, }; -const struct qed_eth_ops *qed_get_eth_ops(u32 version) +const struct qed_eth_ops *qed_get_eth_ops(void) { - if (version != QED_ETH_INTERFACE_VERSION) { - pr_notice("Cannot supply ethtool operations [%08x != %08x]\n", - version, QED_ETH_INTERFACE_VERSION); - return NULL; - } - return &qed_eth_ops_pass; } EXPORT_SYMBOL(qed_get_eth_ops); diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 26d40db07ddd..c31d485f72d6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1172,14 +1172,3 @@ const struct qed_common_ops qed_common_ops_pass = { .chain_free = &qed_chain_free, .set_led = &qed_set_led, }; - -u32 qed_get_protocol_version(enum qed_protocol protocol) -{ - switch (protocol) { - case QED_PROTOCOL_ETH: - return QED_ETH_INTERFACE_VERSION; - default: - return 0; - } -} -EXPORT_SYMBOL(qed_get_protocol_version); diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index d023251544d9..e0a696a57d4d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -32,8 +32,6 @@ __stringify(QEDE_REVISION_VERSION) "." \ __stringify(QEDE_ENGINEERING_VERSION) -#define QEDE_ETH_INTERFACE_VERSION 300 - #define DRV_MODULE_SYM qede struct qede_stats { diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 518af329502d..a55d93eb41fa 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -141,19 +141,10 @@ static int __init qede_init(void) { int ret; - u32 qed_ver; pr_notice("qede_init: %s\n", version); - qed_ver = qed_get_protocol_version(QED_PROTOCOL_ETH); - if (qed_ver != QEDE_ETH_INTERFACE_VERSION) { - pr_notice("Version mismatch [%08x != %08x]\n", - qed_ver, - QEDE_ETH_INTERFACE_VERSION); - return -EINVAL; - } - - qed_ops = qed_get_eth_ops(QEDE_ETH_INTERFACE_VERSION); + qed_ops = qed_get_eth_ops(); if (!qed_ops) { pr_notice("Failed to get qed ethtool operations\n"); return -EINVAL; diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index e1d69834a11f..e00c8dbfc324 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -167,7 +167,7 @@ struct qed_eth_ops { struct qed_eth_stats *stats); }; -const struct qed_eth_ops *qed_get_eth_ops(u32 version); +const struct qed_eth_ops *qed_get_eth_ops(void); void qed_put_eth_ops(void); #endif diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 1f7599c77cd4..b007011e1c82 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -271,15 +271,6 @@ struct qed_common_ops { enum qed_led_mode mode); }; -/** - * @brief qed_get_protocol_version - * - * @param protocol - * - * @return version supported by qed for given protocol driver - */ -u32 qed_get_protocol_version(enum qed_protocol protocol); - #define MASK_FIELD(_name, _value) \ ((_value) &= (_name ## _MASK)) -- cgit v1.2.3 From 8c5ebd0c792a097fcc0e526debbe0887ee378ae5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 10 Apr 2016 12:43:00 +0300 Subject: qed: add Rx flow hash/indirection support. Adds the required API for passing RSS-related configuration from qede. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 17 +---------------- include/linux/qed/qed_eth_if.h | 1 + include/linux/qed/qed_if.h | 11 +++++++++++ 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e848d5a1f7f6..5005497ee23e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -35,19 +35,6 @@ #include "qed_reg_addr.h" #include "qed_sp.h" -enum qed_rss_caps { - QED_RSS_IPV4 = 0x1, - QED_RSS_IPV6 = 0x2, - QED_RSS_IPV4_TCP = 0x4, - QED_RSS_IPV6_TCP = 0x8, - QED_RSS_IPV4_UDP = 0x10, - QED_RSS_IPV6_UDP = 0x20, -}; - -/* Should be the same as ETH_RSS_IND_TABLE_ENTRIES_NUM */ -#define QED_RSS_IND_TABLE_SIZE 128 -#define QED_RSS_KEY_SIZE 10 /* size in 32b chunks */ - struct qed_rss_params { u8 update_rss_config; u8 rss_enable; @@ -1744,9 +1731,7 @@ static int qed_update_vport(struct qed_dev *cdev, sp_rss_params.update_rss_capabilities = 1; sp_rss_params.update_rss_ind_table = 1; sp_rss_params.update_rss_key = 1; - sp_rss_params.rss_caps = QED_RSS_IPV4 | - QED_RSS_IPV6 | - QED_RSS_IPV4_TCP | QED_RSS_IPV6_TCP; + sp_rss_params.rss_caps = params->rss_params.rss_caps; sp_rss_params.rss_table_size_log = 7; /* 2^7 = 128 */ memcpy(sp_rss_params.rss_ind_table, params->rss_params.rss_ind_table, diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index e00c8dbfc324..795c9902e02f 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -27,6 +27,7 @@ struct qed_dev_eth_info { struct qed_update_vport_rss_params { u16 rss_ind_table[128]; u32 rss_key[10]; + u8 rss_caps; }; struct qed_update_vport_params { diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b007011e1c82..67e8c206b2c1 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -515,4 +515,15 @@ static inline void internal_ram_wr(void __iomem *addr, __internal_ram_wr(NULL, addr, size, data); } +enum qed_rss_caps { + QED_RSS_IPV4 = 0x1, + QED_RSS_IPV6 = 0x2, + QED_RSS_IPV4_TCP = 0x4, + QED_RSS_IPV6_TCP = 0x8, + QED_RSS_IPV4_UDP = 0x10, + QED_RSS_IPV6_UDP = 0x20, +}; + +#define QED_RSS_IND_TABLE_SIZE 128 +#define QED_RSS_KEY_SIZE 10 /* size in 32b chunks */ #endif -- cgit v1.2.3 From 04cf31a759ef575f750a63777cee95500e410994 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Mar 2016 22:04:01 +1100 Subject: ftrace: Make ftrace_location_range() global In order to support live patching on powerpc we would like to call ftrace_location_range(), so make it global. Signed-off-by: Torsten Duwe Signed-off-by: Balbir Singh Signed-off-by: Michael Ellerman --- include/linux/ftrace.h | 1 + kernel/trace/ftrace.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 81de7123959d..3481a8e405f9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -455,6 +455,7 @@ int ftrace_update_record(struct dyn_ftrace *rec, int enable); int ftrace_test_record(struct dyn_ftrace *rec, int enable); void ftrace_run_stop_machine(int command); unsigned long ftrace_location(unsigned long ip); +unsigned long ftrace_location_range(unsigned long start, unsigned long end); unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec); unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eca592f977b2..e1b3f2312db0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1533,7 +1533,19 @@ static int ftrace_cmp_recs(const void *a, const void *b) return 0; } -static unsigned long ftrace_location_range(unsigned long start, unsigned long end) +/** + * ftrace_location_range - return the first address of a traced location + * if it touches the given ip range + * @start: start of range to search. + * @end: end of range to search (inclusive). @end points to the last byte + * to check. + * + * Returns rec->ip if the related ftrace location is a least partly within + * the given address range. That is, the first address of the instruction + * that is either a NOP or call to the function tracer. It checks the ftrace + * internal tables to determine if the address belongs or not. + */ +unsigned long ftrace_location_range(unsigned long start, unsigned long end) { struct ftrace_page *pg; struct dyn_ftrace *rec; -- cgit v1.2.3 From d17322feecf80152303426dd724577025d1fbd7e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 9 Apr 2016 10:52:26 +0200 Subject: gpio: sx150x: move platform data into driver The sx150x has some platform data definition in but this file is only included from the driver in the whole kernel so move its contents into the driver. Cc: Wei Chen Cc: Peter Rosin Acked-by: Wolfram Sang Signed-off-by: Linus Walleij --- drivers/gpio/gpio-sx150x.c | 60 ++++++++++++++++++++++++++++++++- include/linux/i2c/sx150x.h | 82 ---------------------------------------------- 2 files changed, 59 insertions(+), 83 deletions(-) delete mode 100644 include/linux/i2c/sx150x.h (limited to 'include/linux') diff --git a/drivers/gpio/gpio-sx150x.c b/drivers/gpio/gpio-sx150x.c index d57e8ad0bfd2..d4501d5f8b8e 100644 --- a/drivers/gpio/gpio-sx150x.c +++ b/drivers/gpio/gpio-sx150x.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -85,6 +84,65 @@ struct sx150x_device_data { } pri; }; +/** + * struct sx150x_platform_data - config data for SX150x driver + * @gpio_base: The index number of the first GPIO assigned to this + * GPIO expander. The expander will create a block of + * consecutively numbered gpios beginning at the given base, + * with the size of the block depending on the model of the + * expander chip. + * @oscio_is_gpo: If set to true, the driver will configure OSCIO as a GPO + * instead of as an oscillator, increasing the size of the + * GP(I)O pool created by this expander by one. The + * output-only GPO pin will be added at the end of the block. + * @io_pullup_ena: A bit-mask which enables or disables the pull-up resistor + * for each IO line in the expander. Setting the bit at + * position n will enable the pull-up for the IO at + * the corresponding offset. For chips with fewer than + * 16 IO pins, high-end bits are ignored. + * @io_pulldn_ena: A bit-mask which enables-or disables the pull-down + * resistor for each IO line in the expander. Setting the + * bit at position n will enable the pull-down for the IO at + * the corresponding offset. For chips with fewer than + * 16 IO pins, high-end bits are ignored. + * @io_open_drain_ena: A bit-mask which enables-or disables open-drain + * operation for each IO line in the expander. Setting the + * bit at position n enables open-drain operation for + * the IO at the corresponding offset. Clearing the bit + * enables regular push-pull operation for that IO. + * For chips with fewer than 16 IO pins, high-end bits + * are ignored. + * @io_polarity: A bit-mask which enables polarity inversion for each IO line + * in the expander. Setting the bit at position n inverts + * the polarity of that IO line, while clearing it results + * in normal polarity. For chips with fewer than 16 IO pins, + * high-end bits are ignored. + * @irq_summary: The 'summary IRQ' line to which the GPIO expander's INT line + * is connected, via which it reports interrupt events + * across all GPIO lines. This must be a real, + * pre-existing IRQ line. + * Setting this value < 0 disables the irq_chip functionality + * of the driver. + * @irq_base: The first 'virtual IRQ' line at which our block of GPIO-based + * IRQ lines will appear. Similarly to gpio_base, the expander + * will create a block of irqs beginning at this number. + * This value is ignored if irq_summary is < 0. + * @reset_during_probe: If set to true, the driver will trigger a full + * reset of the chip at the beginning of the probe + * in order to place it in a known state. + */ +struct sx150x_platform_data { + unsigned gpio_base; + bool oscio_is_gpo; + u16 io_pullup_ena; + u16 io_pulldn_ena; + u16 io_open_drain_ena; + u16 io_polarity; + int irq_summary; + unsigned irq_base; + bool reset_during_probe; +}; + struct sx150x_chip { struct gpio_chip gpio_chip; struct i2c_client *client; diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h deleted file mode 100644 index 52baa79d69a7..000000000000 --- a/include/linux/i2c/sx150x.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Driver for the Semtech SX150x I2C GPIO Expanders - * - * Copyright (c) 2010, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ -#ifndef __LINUX_I2C_SX150X_H -#define __LINUX_I2C_SX150X_H - -/** - * struct sx150x_platform_data - config data for SX150x driver - * @gpio_base: The index number of the first GPIO assigned to this - * GPIO expander. The expander will create a block of - * consecutively numbered gpios beginning at the given base, - * with the size of the block depending on the model of the - * expander chip. - * @oscio_is_gpo: If set to true, the driver will configure OSCIO as a GPO - * instead of as an oscillator, increasing the size of the - * GP(I)O pool created by this expander by one. The - * output-only GPO pin will be added at the end of the block. - * @io_pullup_ena: A bit-mask which enables or disables the pull-up resistor - * for each IO line in the expander. Setting the bit at - * position n will enable the pull-up for the IO at - * the corresponding offset. For chips with fewer than - * 16 IO pins, high-end bits are ignored. - * @io_pulldn_ena: A bit-mask which enables-or disables the pull-down - * resistor for each IO line in the expander. Setting the - * bit at position n will enable the pull-down for the IO at - * the corresponding offset. For chips with fewer than - * 16 IO pins, high-end bits are ignored. - * @io_open_drain_ena: A bit-mask which enables-or disables open-drain - * operation for each IO line in the expander. Setting the - * bit at position n enables open-drain operation for - * the IO at the corresponding offset. Clearing the bit - * enables regular push-pull operation for that IO. - * For chips with fewer than 16 IO pins, high-end bits - * are ignored. - * @io_polarity: A bit-mask which enables polarity inversion for each IO line - * in the expander. Setting the bit at position n inverts - * the polarity of that IO line, while clearing it results - * in normal polarity. For chips with fewer than 16 IO pins, - * high-end bits are ignored. - * @irq_summary: The 'summary IRQ' line to which the GPIO expander's INT line - * is connected, via which it reports interrupt events - * across all GPIO lines. This must be a real, - * pre-existing IRQ line. - * Setting this value < 0 disables the irq_chip functionality - * of the driver. - * @irq_base: The first 'virtual IRQ' line at which our block of GPIO-based - * IRQ lines will appear. Similarly to gpio_base, the expander - * will create a block of irqs beginning at this number. - * This value is ignored if irq_summary is < 0. - * @reset_during_probe: If set to true, the driver will trigger a full - * reset of the chip at the beginning of the probe - * in order to place it in a known state. - */ -struct sx150x_platform_data { - unsigned gpio_base; - bool oscio_is_gpo; - u16 io_pullup_ena; - u16 io_pulldn_ena; - u16 io_open_drain_ena; - u16 io_polarity; - int irq_summary; - unsigned irq_base; - bool reset_during_probe; -}; - -#endif /* __LINUX_I2C_SX150X_H */ -- cgit v1.2.3 From cbc53e08a793b073e79f42ca33f1f3568703540d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:44:51 -0400 Subject: GSO: Add GSO type for fixed IPv4 ID This patch adds support for TSO using IPv4 headers with a fixed IP ID field. This is meant to allow us to do a lossless GRO in the case of TCP flows that use a fixed IP ID such as those that convert IPv6 header to IPv4 headers. In addition I am adding a feature that for now I am referring to TSO with IP ID mangling. Basically when this flag is enabled the device has the option to either output the flow with incrementing IP IDs or with a fixed IP ID regardless of what the original IP ID ordering was. This is useful in cases where the DF bit is set and we do not care if the original IP ID value is maintained. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +++ include/linux/netdevice.h | 1 + include/linux/skbuff.h | 20 +++++++++++--------- net/core/dev.c | 34 +++++++++++++++++++++++++++++----- net/core/ethtool.c | 1 + net/ipv4/af_inet.c | 19 +++++++++++-------- net/ipv4/gre_offload.c | 1 + net/ipv4/tcp_offload.c | 4 +++- net/ipv6/ip6_offload.c | 3 ++- net/mpls/mpls_gso.c | 1 + 10 files changed, 63 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index a734bf43d190..7cf272a4b5c8 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -39,6 +39,7 @@ enum { NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ + NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ @@ -120,6 +121,7 @@ enum { #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) +#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) @@ -147,6 +149,7 @@ enum { /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ + NETIF_F_TSO_MANGLEID | \ NETIF_F_TSO6 | NETIF_F_UFO) /* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9884fe9a6552..8e372d01b3c1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3992,6 +3992,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 007381270ff8..5fba16658f9d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -465,23 +465,25 @@ enum { /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 3, - SKB_GSO_TCPV6 = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 4, - SKB_GSO_FCOE = 1 << 5, + SKB_GSO_TCPV6 = 1 << 5, - SKB_GSO_GRE = 1 << 6, + SKB_GSO_FCOE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 7, + SKB_GSO_GRE = 1 << 7, - SKB_GSO_IPIP = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 8, - SKB_GSO_SIT = 1 << 9, + SKB_GSO_IPIP = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 10, + SKB_GSO_SIT = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 11, - SKB_GSO_TUNNEL_REMCSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, }; #if BITS_PER_LONG > 32 diff --git a/net/core/dev.c b/net/core/dev.c index 09fb1ace9dc8..e896b1953ab6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2825,14 +2825,36 @@ static netdev_features_t dflt_features_check(const struct sk_buff *skb, return vlan_features_check(skb, features); } +static netdev_features_t gso_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + u16 gso_segs = skb_shinfo(skb)->gso_segs; + + if (gso_segs > dev->gso_max_segs) + return features & ~NETIF_F_GSO_MASK; + + /* Make sure to clear the IPv4 ID mangling feature if + * the IPv4 header has the potential to be fragmented. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + struct iphdr *iph = skb->encapsulation ? + inner_ip_hdr(skb) : ip_hdr(skb); + + if (!(iph->frag_off & htons(IP_DF))) + features &= ~NETIF_F_TSO_MANGLEID; + } + + return features; +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; - u16 gso_segs = skb_shinfo(skb)->gso_segs; - if (gso_segs > dev->gso_max_segs) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) + features = gso_features_check(skb, dev, features); /* If encapsulation offload request, verify we are testing * hardware encapsulation features instead of standard @@ -6976,9 +6998,11 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - if (!(dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) dev->hw_features |= NETIF_F_NOCACHE_COPY; - } + + if (dev->hw_features & NETIF_F_TSO) + dev->hw_features |= NETIF_F_TSO_MANGLEID; /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6a7f99661c2f..9494c41cc77c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -79,6 +79,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8217cd22f921..5bbea9a0ce96 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1195,10 +1195,10 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { + bool udpfrag = false, fixedid = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; - bool udpfrag, encap; struct iphdr *iph; int proto; int nhoff; @@ -1217,6 +1217,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TUNNEL_REMCSUM | 0))) goto out; @@ -1248,11 +1249,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - if (skb->encapsulation && - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) - udpfrag = proto == IPPROTO_UDP && encap; - else - udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + if (!skb->encapsulation || encap) { + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); + fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); + + /* fixed ID is invalid if DF bit is not set */ + if (fixedid && !(iph->frag_off & htons(IP_DF))) + goto out; + } ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) @@ -1265,12 +1269,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); if (udpfrag) { - iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; - } else { + } else if (!fixedid) { iph->id = htons(id++); } iph->tot_len = htons(skb->len - nhoff); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6a5bd4317866..6376b0cdf693 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -32,6 +32,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 773083b7f1e9..08dd25d835af 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -89,6 +89,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, ~(SKB_GSO_TCPV4 | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | @@ -98,7 +99,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + !(type & (SKB_GSO_TCPV4 | + SKB_GSO_TCPV6)))) goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 204af2219471..b3a779393d71 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -73,6 +73,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | + SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | @@ -80,7 +82,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 0183b32da942..bbcf60465e5c 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -31,6 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_ECN))) goto out; -- cgit v1.2.3 From 1530545ed64b42e87acb43c0c16401bd1ebae6bf Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:44:57 -0400 Subject: GRO: Add support for TCP with fixed IPv4 ID field, limit tunnel IP ID values This patch does two things. First it allows TCP to aggregate TCP frames with a fixed IPv4 ID field. As a result we should now be able to aggregate flows that were converted from IPv6 to IPv4. In addition this allows us more flexibility for future implementations of segmentation as we may be able to use a fixed IP ID when segmenting the flow. The second thing this does is that it places limitations on the outer IPv4 ID header in the case of tunneled frames. Specifically it forces the IP ID to be incrementing by 1 unless the DF bit is set in the outer IPv4 header. This way we can avoid creating overlapping series of IP IDs that could possibly be fragmented if the frame goes through GRO and is then resegmented via GSO. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- net/core/dev.c | 1 + net/ipv4/af_inet.c | 35 ++++++++++++++++++++++++++++------- net/ipv4/tcp_offload.c | 16 +++++++++++++++- net/ipv6/ip6_offload.c | 8 ++++++-- 5 files changed, 54 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8e372d01b3c1..2d70c521d516 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2121,7 +2121,10 @@ struct napi_gro_cb { /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; - /* 6 bit hole */ + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* 5 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; diff --git a/net/core/dev.c b/net/core/dev.c index e896b1953ab6..b78b586b1856 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4462,6 +4462,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->is_fou = 0; + NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5bbea9a0ce96..8564cab96189 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1328,6 +1328,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, for (p = *head; p; p = p->next) { struct iphdr *iph2; + u16 flush_id; if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -1351,16 +1352,36 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, (iph->tos ^ iph2->tos) | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); - /* Save the IP ID check to be included later when we get to - * the transport layer so only the inner most IP ID is checked. - * This is because some GSO/TSO implementations do not - * correctly increment the IP ID for the outer hdrs. - */ - NAPI_GRO_CB(p)->flush_id = - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; + + /* We need to store of the IP ID check to be included later + * when we can verify that this packet does in fact belong + * to a given flow. + */ + flush_id = (u16)(id - ntohs(iph2->id)); + + /* This bit of code makes it much easier for us to identify + * the cases where we are doing atomic vs non-atomic IP ID + * checks. Specifically an atomic check can return IP ID + * values 0 - 0xFFFF, while a non-atomic check can only + * return 0 or 0xFFFF. + */ + if (!NAPI_GRO_CB(p)->is_atomic || + !(iph->frag_off & htons(IP_DF))) { + flush_id ^= NAPI_GRO_CB(p)->count; + flush_id = flush_id ? 0xFFFF : 0; + } + + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = flush_id; + else + NAPI_GRO_CB(p)->flush_id |= flush_id; } + NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF)); NAPI_GRO_CB(skb)->flush |= flush; skb_set_network_header(skb, off); /* The above will be needed by the transport layer if there is one diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 08dd25d835af..d1ffd55289bd 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -239,7 +239,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) found: /* Include the IP ID check below from the inner most IP hdr */ - flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; + flush = NAPI_GRO_CB(p)->flush; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -248,6 +248,17 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); + /* When we receive our second frame we can made a decision on if we + * continue this flow as an atomic flow with a fixed ID or if we use + * an incrementing ID. + */ + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + mss = skb_shinfo(p)->gso_size; flush |= (len - 1) >= mss; @@ -316,6 +327,9 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (NAPI_GRO_CB(skb)->is_atomic) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + return tcp_gro_complete(skb); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b3a779393d71..061adcda65f3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -240,10 +240,14 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; - /* Clear flush_id, there's really no concept of ID in IPv6. */ - NAPI_GRO_CB(p)->flush_id = 0; + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = 0; } + NAPI_GRO_CB(skb)->is_atomic = true; NAPI_GRO_CB(skb)->flush |= flush; skb_gro_postpull_rcsum(skb, iph, nlen); -- cgit v1.2.3 From 802ab55adc39a06940a1b384e9fd0387fc762d7e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:45:03 -0400 Subject: GSO: Support partial segmentation offload This patch adds support for something I am referring to as GSO partial. The basic idea is that we can support a broader range of devices for segmentation if we use fixed outer headers and have the hardware only really deal with segmenting the inner header. The idea behind the naming is due to the fact that everything before csum_start will be fixed headers, and everything after will be the region that is handled by hardware. With the current implementation it allows us to add support for the following GSO types with an inner TSO_MANGLEID or TSO6 offload: NETIF_F_GSO_GRE NETIF_F_GSO_GRE_CSUM NETIF_F_GSO_IPIP NETIF_F_GSO_SIT NETIF_F_UDP_TUNNEL NETIF_F_UDP_TUNNEL_CSUM In the case of hardware that already supports tunneling we may be able to extend this further to support TSO_TCPV4 without TSO_MANGLEID if the hardware can support updating inner IPv4 headers. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 5 +++++ include/linux/netdevice.h | 2 ++ include/linux/skbuff.h | 9 +++++++-- net/core/dev.c | 36 +++++++++++++++++++++++++++++++++--- net/core/ethtool.c | 1 + net/core/skbuff.c | 29 ++++++++++++++++++++++++++++- net/ipv4/af_inet.c | 20 ++++++++++++++++---- net/ipv4/gre_offload.c | 26 +++++++++++++++++++++----- net/ipv4/tcp_offload.c | 10 ++++++++-- net/ipv4/udp_offload.c | 27 +++++++++++++++++++++------ net/ipv6/ip6_offload.c | 10 +++++++++- 11 files changed, 151 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7cf272a4b5c8..9fc79df0e561 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -48,6 +48,10 @@ enum { NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ + NETIF_F_GSO_PARTIAL_BIT, /* ... Only segment inner-most L4 + * in hardware and all other + * headers in software. + */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, @@ -122,6 +126,7 @@ enum { #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) +#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d70c521d516..a3bb534576a3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1654,6 +1654,7 @@ struct net_device { netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; + netdev_features_t gso_partial_features; int ifindex; int group; @@ -4004,6 +4005,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5fba16658f9d..da0ace389fec 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -483,7 +483,9 @@ enum { SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 13, + SKB_GSO_PARTIAL = 1 << 13, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 14, }; #if BITS_PER_LONG > 32 @@ -3591,7 +3593,10 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) * Keeps track of level of encapsulation of network headers. */ struct skb_gso_cb { - int mac_offset; + union { + int mac_offset; + int data_offset; + }; int encap_level; __wsum csum; __u16 csum_start; diff --git a/net/core/dev.c b/net/core/dev.c index b78b586b1856..556dd09af3b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2711,6 +2711,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); @@ -2834,8 +2847,17 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > dev->gso_max_segs) return features & ~NETIF_F_GSO_MASK; - /* Make sure to clear the IPv4 ID mangling feature if - * the IPv4 header has the potential to be fragmented. + /* Support for GSO partial features requires software + * intervention before we can actually process the packets + * so we need to strip support for any partial features now + * and we can pull them back in after we have partially + * segmented the frame. + */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)) + features &= ~dev->gso_partial_features; + + /* Make sure to clear the IPv4 ID mangling feature if the + * IPv4 header has the potential to be fragmented. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { struct iphdr *iph = skb->encapsulation ? @@ -6729,6 +6751,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + /* GSO partial features require GSO partial be set */ + if ((features & dev->gso_partial_features) && + !(features & NETIF_F_GSO_PARTIAL)) { + netdev_dbg(dev, + "Dropping partially supported GSO features since no GSO partial.\n"); + features &= ~dev->gso_partial_features; + } + #ifdef CONFIG_NET_RX_BUSY_POLL if (dev->netdev_ops->ndo_busy_poll) features |= NETIF_F_BUSY_POLL; @@ -7011,7 +7041,7 @@ int register_netdevice(struct net_device *dev) /* Make NETIF_F_SG inheritable to tunnel devices. */ - dev->hw_enc_features |= NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL; /* Make NETIF_F_SG inheritable to MPLS. */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9494c41cc77c..e0cf20a3b3dd 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -88,6 +88,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", + [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d04c2d1c8c87..4cc594cdaada 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,8 +3076,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); + unsigned int partial_segs = 0; unsigned int headroom; - unsigned int len; + unsigned int len = head_skb->len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); @@ -3094,6 +3095,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, csum = !!can_checksum_protocol(features, proto); + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ + if (features & NETIF_F_GSO_PARTIAL) { + partial_segs = len / mss; + mss *= partial_segs; + } + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3281,6 +3291,23 @@ perform_csum_check: */ segs->prev = tail; + /* Update GSO info on first skb in partial sequence. */ + if (partial_segs) { + int type = skb_shinfo(head_skb)->gso_type; + + /* Update type to add partial and then remove dodgy if set */ + type |= SKB_GSO_PARTIAL; + type &= ~SKB_GSO_DODGY; + + /* Update GSO info and prepare to start updating headers on + * our way back down the stack of protocols. + */ + skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; + skb_shinfo(segs)->gso_segs = partial_segs; + skb_shinfo(segs)->gso_type = type; + SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + } + /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8564cab96189..2e6e65fc4d20 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1200,7 +1200,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, const struct net_offload *ops; unsigned int offset = 0; struct iphdr *iph; - int proto; + int proto, tot_len; int nhoff; int ihl; int id; @@ -1219,6 +1219,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TCP_FIXEDID | SKB_GSO_TUNNEL_REMCSUM | + SKB_GSO_PARTIAL | 0))) goto out; @@ -1273,10 +1274,21 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; - } else if (!fixedid) { - iph->id = htons(id++); + tot_len = skb->len - nhoff; + } else if (skb_is_gso(skb)) { + if (!fixedid) { + iph->id = htons(id); + id += skb_shinfo(skb)->gso_segs; + } + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; + } else { + if (!fixedid) + iph->id = htons(id++); + tot_len = skb->len - nhoff; } - iph->tot_len = htons(skb->len - nhoff); + iph->tot_len = htons(tot_len); ip_send_check(iph); if (encap) skb_reset_inner_headers(skb); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6376b0cdf693..20557f211408 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT))) + SKB_GSO_SIT | + SKB_GSO_PARTIAL))) goto out; if (!skb->encapsulation) @@ -87,7 +88,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb = segs; do { struct gre_base_hdr *greh; - __be32 *pcsum; + __sum16 *pcsum; /* Set up inner headers if we are offloading inner checksum */ if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -107,10 +108,25 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, continue; greh = (struct gre_base_hdr *)skb_transport_header(skb); - pcsum = (__be32 *)(greh + 1); + pcsum = (__sum16 *)(greh + 1); + + if (skb_is_gso(skb)) { + unsigned int partial_adj; + + /* Adjust checksum to account for the fact that + * the partial checksum is based on actual size + * whereas headers should be based on MSS size. + */ + partial_adj = skb->len + skb_headroom(skb) - + SKB_GSO_CB(skb)->data_offset - + skb_shinfo(skb)->gso_size; + *pcsum = ~csum_fold((__force __wsum)htonl(partial_adj)); + } else { + *pcsum = 0; + } - *pcsum = 0; - *(__sum16 *)pcsum = gso_make_checksum(skb, 0); + *(pcsum + 1) = 0; + *pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index d1ffd55289bd..02737b607aa7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -109,6 +109,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } + /* GSO partial only requires splitting the frame into an MSS + * multiple and possibly a remainder. So update the mss now. + */ + if (features & NETIF_F_GSO_PARTIAL) + mss = skb->len - (skb->len % mss); + copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -133,7 +139,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); - do { + while (skb->next) { th->fin = th->psh = 0; th->check = newcheck; @@ -153,7 +159,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->seq = htonl(seq); th->cwr = 0; - } while (skb->next); + } /* Following permits TCP Small Queues to work well with GSO : * The callback to TCP stack will be called at the time last frag diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6230cf4b0d2d..097060def7f0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -39,8 +39,11 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * 16 bit length field due to the header being added outside of an * IP or IPv6 frame that was already limited to 64K - 1. */ - partial = csum_sub(csum_unfold(uh->check), - (__force __wsum)htonl(skb->len)); + if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) + partial = (__force __wsum)uh->len; + else + partial = (__force __wsum)htonl(skb->len); + partial = csum_sub(csum_unfold(uh->check), partial); /* setup inner skb. */ skb->encapsulation = 0; @@ -89,7 +92,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, udp_offset = outer_hlen - tnl_hlen; skb = segs; do { - __be16 len; + unsigned int len; if (remcsum) skb->ip_summed = CHECKSUM_NONE; @@ -107,14 +110,26 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); - len = htons(skb->len - udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = len; + + /* If we are only performing partial GSO the inner header + * will be using a length value equal to only one MSS sized + * segment instead of the entire frame. + */ + if (skb_is_gso(skb)) { + uh->len = htons(skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)uh); + } else { + uh->len = htons(len); + } if (!need_csum) continue; - uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len)); + uh->check = ~csum_fold(csum_add(partial, + (__force __wsum)htonl(len))); if (skb->encapsulation || !offload_csum) { uh->check = gso_make_checksum(skb, ~uh->check); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 061adcda65f3..f5eb184e1093 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,6 +63,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int proto; struct frag_hdr *fptr; unsigned int unfrag_ip6hlen; + unsigned int payload_len; u8 *prevhdr; int offset = 0; bool encap, udpfrag; @@ -82,6 +83,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | + SKB_GSO_PARTIAL | 0))) goto out; @@ -118,7 +120,13 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); + if (skb_is_gso(skb)) + payload_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)(ipv6h + 1); + else + payload_len = skb->len - nhoff - sizeof(*ipv6h); + ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { -- cgit v1.2.3 From 435faee1aae9c1ac231f89e4faf0437bfe29f425 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 Apr 2016 00:10:51 +0200 Subject: bpf, verifier: add ARG_PTR_TO_RAW_STACK type When passing buffers from eBPF stack space into a helper function, we have ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure that such buffers are initialized, within boundaries, etc. However, the downside with this is that we have a couple of helper functions such as bpf_skb_load_bytes() that fill out the passed buffer in the expected success case anyway, so zero initializing them prior to the helper call is unneeded/wasted instructions in the eBPF program that can be avoided. Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK. The idea is to skip the STACK_MISC check in check_stack_boundary() and color the related stack slots as STACK_MISC after we checked all call arguments. Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of the helper function will fill the provided buffer area, so that we cannot leak any uninitialized stack memory. This f.e. means that error paths need to memset() the buffers, but the expected fast-path doesn't have to do this anymore. Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK argument, we can keep it simple and don't need to check for multiple areas. Should in future such a use-case really appear, we have check_raw_mode() that will make sure we implement support for it first. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 +++++ kernel/bpf/verifier.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 59 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b2365a6eba3d..5fb3c610fa96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -66,6 +66,11 @@ enum bpf_arg_type { * functions that access data on eBPF program stack */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ + ARG_PTR_TO_RAW_STACK, /* any pointer to eBPF program stack, area does not + * need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 202f8f738542..9c843a5417da 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -207,6 +207,9 @@ struct verifier_env { struct bpf_call_arg_meta { struct bpf_map *map_ptr; + bool raw_mode; + int regno; + int access_size; }; /* verbose verifier prints what it's seeing @@ -789,7 +792,8 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * and all elements of stack are initialized */ static int check_stack_boundary(struct verifier_env *env, int regno, - int access_size, bool zero_size_allowed) + int access_size, bool zero_size_allowed, + struct bpf_call_arg_meta *meta) { struct verifier_state *state = &env->cur_state; struct reg_state *regs = state->regs; @@ -815,6 +819,12 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return -EACCES; } + if (meta && meta->raw_mode) { + meta->access_size = access_size; + meta->regno = regno; + return 0; + } + for (i = 0; i < access_size; i++) { if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { verbose("invalid indirect read from stack off %d+%d size %d\n", @@ -859,7 +869,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, expected_type = CONST_PTR_TO_MAP; } else if (arg_type == ARG_PTR_TO_CTX) { expected_type = PTR_TO_CTX; - } else if (arg_type == ARG_PTR_TO_STACK) { + } else if (arg_type == ARG_PTR_TO_STACK || + arg_type == ARG_PTR_TO_RAW_STACK) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be * passed in as argument, it's a CONST_IMM type. Final test @@ -867,6 +878,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, */ if (reg->type == CONST_IMM && reg->imm == 0) expected_type = CONST_IMM; + meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; @@ -896,7 +908,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return -EACCES; } err = check_stack_boundary(env, regno, meta->map_ptr->key_size, - false); + false, NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity @@ -907,7 +919,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return -EACCES; } err = check_stack_boundary(env, regno, - meta->map_ptr->value_size, false); + meta->map_ptr->value_size, + false, NULL); } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO); @@ -922,7 +935,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return -EACCES; } err = check_stack_boundary(env, regno - 1, reg->imm, - zero_size_allowed); + zero_size_allowed, meta); } return err; @@ -953,6 +966,24 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) return 0; } +static int check_raw_mode(const struct bpf_func_proto *fn) +{ + int count = 0; + + if (fn->arg1_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg2_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg3_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg4_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg5_type == ARG_PTR_TO_RAW_STACK) + count++; + + return count > 1 ? -EINVAL : 0; +} + static int check_call(struct verifier_env *env, int func_id) { struct verifier_state *state = &env->cur_state; @@ -984,6 +1015,15 @@ static int check_call(struct verifier_env *env, int func_id) memset(&meta, 0, sizeof(meta)); + /* We only support one arg being in raw mode at the moment, which + * is sufficient for the helper functions we have right now. + */ + err = check_raw_mode(fn); + if (err) { + verbose("kernel subsystem misconfigured func %d\n", func_id); + return err; + } + /* check args */ err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); if (err) @@ -1001,6 +1041,15 @@ static int check_call(struct verifier_env *env, int func_id) if (err) return err; + /* Mark slots with STACK_MISC in case of raw mode, stack offset + * is inferred from register state. + */ + for (i = 0; i < meta.access_size; i++) { + err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1); + if (err) + return err; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { reg = regs + caller_saved[i]; -- cgit v1.2.3 From 58bc67fc32b1c67fb045f4828a67134dc8fee631 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 10 Jul 2015 15:23:28 +0300 Subject: ARM: OMAP2+: gpmc: Add platform data Add a platform data structure for GPMC. It contains all the necessary platform information that needs to be passed from platform init code to GPMC driver. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- include/linux/omap-gpmc.h | 3 +-- include/linux/platform_data/gpmc-omap.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 include/linux/platform_data/gpmc-omap.h (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index d833eb4dd446..45d9075be1e5 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -7,8 +7,7 @@ * option) any later version. */ -/* Maximum Number of Chip Selects */ -#define GPMC_CS_NUM 8 +#include #define GPMC_CONFIG_WP 0x00000005 diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h new file mode 100644 index 000000000000..6804a8b387d7 --- /dev/null +++ b/include/linux/platform_data/gpmc-omap.h @@ -0,0 +1,30 @@ +/* + * OMAP GPMC Platform data + * + * Copyright (C) 2014 Texas Instruments, Inc. - http://www.ti.com + * Roger Quadros + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#ifndef _GPMC_OMAP_H_ +#define _GPMC_OMAP_H_ + +/* Maximum Number of Chip Selects */ +#define GPMC_CS_NUM 8 + +/* Data for each chip select */ +struct gpmc_omap_cs_data { + bool valid; /* data is valid */ + bool is_nand; /* device within this CS is NAND */ + struct platform_device *pdev; /* device within this CS region */ + unsigned int pdata_size; +}; + +struct gpmc_omap_platform_data { + struct gpmc_omap_cs_data cs[GPMC_CS_NUM]; +}; + +#endif /* _GPMC_OMAP_H */ -- cgit v1.2.3 From fabe7d7756d17f5da4bd80fa2373c4bd93ed39e5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 10 Jul 2015 15:23:29 +0300 Subject: ARM: OMAP2+: gpmc: Add gpmc timings and settings to platform data Add device_timings, gpmc_timings and gpmc_setting to gpmc platform data. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- include/linux/omap-gpmc.h | 139 ------------------------------- include/linux/platform_data/gpmc-omap.h | 142 ++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 139 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 45d9075be1e5..2dcef1c8c8d4 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -14,145 +14,6 @@ #define GPMC_IRQ_FIFOEVENTENABLE 0x01 #define GPMC_IRQ_COUNT_EVENT 0x02 -#define GPMC_BURST_4 4 /* 4 word burst */ -#define GPMC_BURST_8 8 /* 8 word burst */ -#define GPMC_BURST_16 16 /* 16 word burst */ -#define GPMC_DEVWIDTH_8BIT 1 /* 8-bit device width */ -#define GPMC_DEVWIDTH_16BIT 2 /* 16-bit device width */ -#define GPMC_MUX_AAD 1 /* Addr-Addr-Data multiplex */ -#define GPMC_MUX_AD 2 /* Addr-Data multiplex */ - -/* bool type time settings */ -struct gpmc_bool_timings { - bool cycle2cyclediffcsen; - bool cycle2cyclesamecsen; - bool we_extra_delay; - bool oe_extra_delay; - bool adv_extra_delay; - bool cs_extra_delay; - bool time_para_granularity; -}; - -/* - * Note that all values in this struct are in nanoseconds except sync_clk - * (which is in picoseconds), while the register values are in gpmc_fck cycles. - */ -struct gpmc_timings { - /* Minimum clock period for synchronous mode (in picoseconds) */ - u32 sync_clk; - - /* Chip-select signal timings corresponding to GPMC_CS_CONFIG2 */ - u32 cs_on; /* Assertion time */ - u32 cs_rd_off; /* Read deassertion time */ - u32 cs_wr_off; /* Write deassertion time */ - - /* ADV signal timings corresponding to GPMC_CONFIG3 */ - u32 adv_on; /* Assertion time */ - u32 adv_rd_off; /* Read deassertion time */ - u32 adv_wr_off; /* Write deassertion time */ - u32 adv_aad_mux_on; /* ADV assertion time for AAD */ - u32 adv_aad_mux_rd_off; /* ADV read deassertion time for AAD */ - u32 adv_aad_mux_wr_off; /* ADV write deassertion time for AAD */ - - /* WE signals timings corresponding to GPMC_CONFIG4 */ - u32 we_on; /* WE assertion time */ - u32 we_off; /* WE deassertion time */ - - /* OE signals timings corresponding to GPMC_CONFIG4 */ - u32 oe_on; /* OE assertion time */ - u32 oe_off; /* OE deassertion time */ - u32 oe_aad_mux_on; /* OE assertion time for AAD */ - u32 oe_aad_mux_off; /* OE deassertion time for AAD */ - - /* Access time and cycle time timings corresponding to GPMC_CONFIG5 */ - u32 page_burst_access; /* Multiple access word delay */ - u32 access; /* Start-cycle to first data valid delay */ - u32 rd_cycle; /* Total read cycle time */ - u32 wr_cycle; /* Total write cycle time */ - - u32 bus_turnaround; - u32 cycle2cycle_delay; - - u32 wait_monitoring; - u32 clk_activation; - - /* The following are only on OMAP3430 */ - u32 wr_access; /* WRACCESSTIME */ - u32 wr_data_mux_bus; /* WRDATAONADMUXBUS */ - - struct gpmc_bool_timings bool_timings; -}; - -/* Device timings in picoseconds */ -struct gpmc_device_timings { - u32 t_ceasu; /* address setup to CS valid */ - u32 t_avdasu; /* address setup to ADV valid */ - /* XXX: try to combine t_avdp_r & t_avdp_w. Issue is - * of tusb using these timings even for sync whilst - * ideally for adv_rd/(wr)_off it should have considered - * t_avdh instead. This indirectly necessitates r/w - * variations of t_avdp as it is possible to have one - * sync & other async - */ - u32 t_avdp_r; /* ADV low time (what about t_cer ?) */ - u32 t_avdp_w; - u32 t_aavdh; /* address hold time */ - u32 t_oeasu; /* address setup to OE valid */ - u32 t_aa; /* access time from ADV assertion */ - u32 t_iaa; /* initial access time */ - u32 t_oe; /* access time from OE assertion */ - u32 t_ce; /* access time from CS asertion */ - u32 t_rd_cycle; /* read cycle time */ - u32 t_cez_r; /* read CS deassertion to high Z */ - u32 t_cez_w; /* write CS deassertion to high Z */ - u32 t_oez; /* OE deassertion to high Z */ - u32 t_weasu; /* address setup to WE valid */ - u32 t_wpl; /* write assertion time */ - u32 t_wph; /* write deassertion time */ - u32 t_wr_cycle; /* write cycle time */ - - u32 clk; - u32 t_bacc; /* burst access valid clock to output delay */ - u32 t_ces; /* CS setup time to clk */ - u32 t_avds; /* ADV setup time to clk */ - u32 t_avdh; /* ADV hold time from clk */ - u32 t_ach; /* address hold time from clk */ - u32 t_rdyo; /* clk to ready valid */ - - u32 t_ce_rdyz; /* XXX: description ?, or use t_cez instead */ - u32 t_ce_avd; /* CS on to ADV on delay */ - - /* XXX: check the possibility of combining - * cyc_aavhd_oe & cyc_aavdh_we - */ - u8 cyc_aavdh_oe;/* read address hold time in cycles */ - u8 cyc_aavdh_we;/* write address hold time in cycles */ - u8 cyc_oe; /* access time from OE assertion in cycles */ - u8 cyc_wpl; /* write deassertion time in cycles */ - u32 cyc_iaa; /* initial access time in cycles */ - - /* extra delays */ - bool ce_xdelay; - bool avd_xdelay; - bool oe_xdelay; - bool we_xdelay; -}; - -struct gpmc_settings { - bool burst_wrap; /* enables wrap bursting */ - bool burst_read; /* enables read page/burst mode */ - bool burst_write; /* enables write page/burst mode */ - bool device_nand; /* device is NAND */ - bool sync_read; /* enables synchronous reads */ - bool sync_write; /* enables synchronous writes */ - bool wait_on_read; /* monitor wait on reads */ - bool wait_on_write; /* monitor wait on writes */ - u32 burst_len; /* page/burst length */ - u32 device_width; /* device bus width (8 or 16 bit) */ - u32 mux_add_data; /* multiplex address & data */ - u32 wait_pin; /* wait-pin to be used */ -}; - extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct gpmc_settings *gpmc_s, struct gpmc_device_timings *dev_t); diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h index 6804a8b387d7..67ccdb0e1606 100644 --- a/include/linux/platform_data/gpmc-omap.h +++ b/include/linux/platform_data/gpmc-omap.h @@ -15,10 +15,152 @@ /* Maximum Number of Chip Selects */ #define GPMC_CS_NUM 8 +/* bool type time settings */ +struct gpmc_bool_timings { + bool cycle2cyclediffcsen; + bool cycle2cyclesamecsen; + bool we_extra_delay; + bool oe_extra_delay; + bool adv_extra_delay; + bool cs_extra_delay; + bool time_para_granularity; +}; + +/* + * Note that all values in this struct are in nanoseconds except sync_clk + * (which is in picoseconds), while the register values are in gpmc_fck cycles. + */ +struct gpmc_timings { + /* Minimum clock period for synchronous mode (in picoseconds) */ + u32 sync_clk; + + /* Chip-select signal timings corresponding to GPMC_CS_CONFIG2 */ + u32 cs_on; /* Assertion time */ + u32 cs_rd_off; /* Read deassertion time */ + u32 cs_wr_off; /* Write deassertion time */ + + /* ADV signal timings corresponding to GPMC_CONFIG3 */ + u32 adv_on; /* Assertion time */ + u32 adv_rd_off; /* Read deassertion time */ + u32 adv_wr_off; /* Write deassertion time */ + u32 adv_aad_mux_on; /* ADV assertion time for AAD */ + u32 adv_aad_mux_rd_off; /* ADV read deassertion time for AAD */ + u32 adv_aad_mux_wr_off; /* ADV write deassertion time for AAD */ + + /* WE signals timings corresponding to GPMC_CONFIG4 */ + u32 we_on; /* WE assertion time */ + u32 we_off; /* WE deassertion time */ + + /* OE signals timings corresponding to GPMC_CONFIG4 */ + u32 oe_on; /* OE assertion time */ + u32 oe_off; /* OE deassertion time */ + u32 oe_aad_mux_on; /* OE assertion time for AAD */ + u32 oe_aad_mux_off; /* OE deassertion time for AAD */ + + /* Access time and cycle time timings corresponding to GPMC_CONFIG5 */ + u32 page_burst_access; /* Multiple access word delay */ + u32 access; /* Start-cycle to first data valid delay */ + u32 rd_cycle; /* Total read cycle time */ + u32 wr_cycle; /* Total write cycle time */ + + u32 bus_turnaround; + u32 cycle2cycle_delay; + + u32 wait_monitoring; + u32 clk_activation; + + /* The following are only on OMAP3430 */ + u32 wr_access; /* WRACCESSTIME */ + u32 wr_data_mux_bus; /* WRDATAONADMUXBUS */ + + struct gpmc_bool_timings bool_timings; +}; + +/* Device timings in picoseconds */ +struct gpmc_device_timings { + u32 t_ceasu; /* address setup to CS valid */ + u32 t_avdasu; /* address setup to ADV valid */ + /* XXX: try to combine t_avdp_r & t_avdp_w. Issue is + * of tusb using these timings even for sync whilst + * ideally for adv_rd/(wr)_off it should have considered + * t_avdh instead. This indirectly necessitates r/w + * variations of t_avdp as it is possible to have one + * sync & other async + */ + u32 t_avdp_r; /* ADV low time (what about t_cer ?) */ + u32 t_avdp_w; + u32 t_aavdh; /* address hold time */ + u32 t_oeasu; /* address setup to OE valid */ + u32 t_aa; /* access time from ADV assertion */ + u32 t_iaa; /* initial access time */ + u32 t_oe; /* access time from OE assertion */ + u32 t_ce; /* access time from CS asertion */ + u32 t_rd_cycle; /* read cycle time */ + u32 t_cez_r; /* read CS deassertion to high Z */ + u32 t_cez_w; /* write CS deassertion to high Z */ + u32 t_oez; /* OE deassertion to high Z */ + u32 t_weasu; /* address setup to WE valid */ + u32 t_wpl; /* write assertion time */ + u32 t_wph; /* write deassertion time */ + u32 t_wr_cycle; /* write cycle time */ + + u32 clk; + u32 t_bacc; /* burst access valid clock to output delay */ + u32 t_ces; /* CS setup time to clk */ + u32 t_avds; /* ADV setup time to clk */ + u32 t_avdh; /* ADV hold time from clk */ + u32 t_ach; /* address hold time from clk */ + u32 t_rdyo; /* clk to ready valid */ + + u32 t_ce_rdyz; /* XXX: description ?, or use t_cez instead */ + u32 t_ce_avd; /* CS on to ADV on delay */ + + /* XXX: check the possibility of combining + * cyc_aavhd_oe & cyc_aavdh_we + */ + u8 cyc_aavdh_oe;/* read address hold time in cycles */ + u8 cyc_aavdh_we;/* write address hold time in cycles */ + u8 cyc_oe; /* access time from OE assertion in cycles */ + u8 cyc_wpl; /* write deassertion time in cycles */ + u32 cyc_iaa; /* initial access time in cycles */ + + /* extra delays */ + bool ce_xdelay; + bool avd_xdelay; + bool oe_xdelay; + bool we_xdelay; +}; + +#define GPMC_BURST_4 4 /* 4 word burst */ +#define GPMC_BURST_8 8 /* 8 word burst */ +#define GPMC_BURST_16 16 /* 16 word burst */ +#define GPMC_DEVWIDTH_8BIT 1 /* 8-bit device width */ +#define GPMC_DEVWIDTH_16BIT 2 /* 16-bit device width */ +#define GPMC_MUX_AAD 1 /* Addr-Addr-Data multiplex */ +#define GPMC_MUX_AD 2 /* Addr-Data multiplex */ + +struct gpmc_settings { + bool burst_wrap; /* enables wrap bursting */ + bool burst_read; /* enables read page/burst mode */ + bool burst_write; /* enables write page/burst mode */ + bool device_nand; /* device is NAND */ + bool sync_read; /* enables synchronous reads */ + bool sync_write; /* enables synchronous writes */ + bool wait_on_read; /* monitor wait on reads */ + bool wait_on_write; /* monitor wait on writes */ + u32 burst_len; /* page/burst length */ + u32 device_width; /* device bus width (8 or 16 bit) */ + u32 mux_add_data; /* multiplex address & data */ + u32 wait_pin; /* wait-pin to be used */ +}; + /* Data for each chip select */ struct gpmc_omap_cs_data { bool valid; /* data is valid */ bool is_nand; /* device within this CS is NAND */ + struct gpmc_settings *settings; + struct gpmc_device_timings *device_timings; + struct gpmc_timings *gpmc_timings; struct platform_device *pdev; /* device within this CS region */ unsigned int pdata_size; }; -- cgit v1.2.3 From f47fcad63f6847ea677c6c7030f30fd6438e0052 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 5 Aug 2015 13:58:01 +0300 Subject: memory: omap-gpmc: Introduce GPMC to NAND interface The OMAP GPMC module has certain registers dedicated for NAND access and some NAND bits mixed with other GPMC functionality. For the NAND dedicated registers we have the struct gpmc_nand_regs. The NAND driver needs to access NAND specific bits from the following non-dedicated registers - EMPTYWRITEBUFFERSTATUS from GPMC_STATUS For accessing these bits we introduce the struct gpmc_nand_ops. Add gpmc_omap_get_nand_ops() that returns the gpmc_nand_ops along with updating the gpmc_nand_regs. This API will be called by the OMAP NAND driver to access the necessary bits in GPMC register space. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- drivers/memory/omap-gpmc.c | 21 +++++++++++++++++++++ include/linux/omap-gpmc.h | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 21825ddce4a3..0b62afd86f7e 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -1118,6 +1118,27 @@ void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) } } +static struct gpmc_nand_ops nand_ops; + +/** + * gpmc_omap_get_nand_ops - Get the GPMC NAND interface + * @regs: the GPMC NAND register map exclusive for NAND use. + * @cs: GPMC chip select number on which the NAND sits. The + * register map returned will be specific to this chip select. + * + * Returns NULL on error e.g. invalid cs. + */ +struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *reg, int cs) +{ + if (cs >= gpmc_cs_num) + return NULL; + + gpmc_update_nand_reg(reg, cs); + + return &nand_ops; +} +EXPORT_SYMBOL_GPL(gpmc_omap_get_nand_ops); + int gpmc_get_client_irq(unsigned irq_config) { int i; diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 2dcef1c8c8d4..dc2ada6fb9b4 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -14,14 +14,45 @@ #define GPMC_IRQ_FIFOEVENTENABLE 0x01 #define GPMC_IRQ_COUNT_EVENT 0x02 +/** + * gpmc_nand_ops - Interface between NAND and GPMC + * @nand_write_buffer_empty: get the NAND write buffer empty status. + */ +struct gpmc_nand_ops { + bool (*nand_writebuffer_empty)(void); +}; + +struct gpmc_nand_regs; + +#if IS_ENABLED(CONFIG_OMAP_GPMC) +struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, + int cs); +#else +static inline gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, + int cs) +{ + return NULL; +} +#endif /* CONFIG_OMAP_GPMC */ + +/*--------------------------------*/ + +/* deprecated APIs */ +#if IS_ENABLED(CONFIG_OMAP_GPMC) +void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs); +#else +static inline void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) +{ +} +#endif /* CONFIG_OMAP_GPMC */ +/*--------------------------------*/ + extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct gpmc_settings *gpmc_s, struct gpmc_device_timings *dev_t); -struct gpmc_nand_regs; struct device_node; -extern void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs); extern int gpmc_get_client_irq(unsigned irq_config); extern unsigned int gpmc_ticks_to_ns(unsigned int ticks); -- cgit v1.2.3 From 384258f252727c67772bbd48dad3185a30ba50d3 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 30 Jul 2015 14:49:23 +0300 Subject: memory: omap-gpmc: Implement IRQ domain for NAND IRQs GPMC provides 2 interrupts for NAND use. i.e. fifoevent and termcount. Use IRQ domain for this. NAND device tree node can then get the necessary interrupts by using gpmc as the interrupt parent. Legacy boot uses gpmc_get_client_irq to get the NAND interrupts from the GPMC IRQ domain. Get rid of custom bitmasks and use IRQ domain for that as well. Signed-off-by: Roger Quadros Acked-by: Rob Herring Acked-by: Tony Lindgren --- Documentation/devicetree/bindings/bus/ti-gpmc.txt | 8 + drivers/memory/omap-gpmc.c | 246 ++++++++++++---------- include/linux/omap-gpmc.h | 5 +- 3 files changed, 144 insertions(+), 115 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/bus/ti-gpmc.txt b/Documentation/devicetree/bindings/bus/ti-gpmc.txt index 01683707060b..13f13786f992 100644 --- a/Documentation/devicetree/bindings/bus/ti-gpmc.txt +++ b/Documentation/devicetree/bindings/bus/ti-gpmc.txt @@ -32,6 +32,12 @@ Required properties: bootloader) are used for the physical address decoding. As this will change in the future, filling correct values here is a requirement. + - interrupt-controller: The GPMC driver implements and interrupt controller for + the NAND events "fifoevent" and "termcount". + The interrupt number mapping is as follows + 0 - NAND_fifoevent + 1 - NAND_termcount + - interrupt-cells: Must be set to 2 Timing properties for child nodes. All are optional and default to 0. @@ -130,6 +136,8 @@ Example for an AM33xx board: #address-cells = <2>; #size-cells = <1>; ranges = <0 0 0x08000000 0x10000000>; /* CS0 @addr 0x8000000, size 0x10000000 */ + interrupt-controller; + #interrupt-cells = <2>; /* child nodes go here */ }; diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 90dfba5a8f55..e28d6bc2500a 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -127,7 +128,6 @@ #define GPMC_CONFIG_RDY_BSY 0x00000001 #define GPMC_CONFIG_DEV_SIZE 0x00000002 #define GPMC_CONFIG_DEV_TYPE 0x00000003 -#define GPMC_SET_IRQ_STATUS 0x00000004 #define GPMC_CONFIG1_WRAPBURST_SUPP (1 << 31) #define GPMC_CONFIG1_READMULTIPLE_SUPP (1 << 30) @@ -176,8 +176,6 @@ #define GPMC_CONFIG_WRITEPROTECT 0x00000010 #define WR_RD_PIN_MONITORING 0x00600000 -#define GPMC_ENABLE_IRQ 0x0000000d - /* ECC commands */ #define GPMC_ECC_READ 0 /* Reset Hardware ECC for read */ #define GPMC_ECC_WRITE 1 /* Reset Hardware ECC for write */ @@ -201,11 +199,6 @@ struct gpmc_cs_data { struct resource mem; }; -struct gpmc_client_irq { - unsigned irq; - u32 bitmask; -}; - /* Structure to save gpmc cs context */ struct gpmc_cs_config { u32 config1; @@ -233,9 +226,13 @@ struct omap3_gpmc_regs { struct gpmc_cs_config cs_context[GPMC_CS_NUM]; }; -static struct gpmc_client_irq gpmc_client_irq[GPMC_NR_IRQ]; -static struct irq_chip gpmc_irq_chip; -static int gpmc_irq_start; +struct gpmc_device { + struct device *dev; + int irq; + struct irq_chip irq_chip; +}; + +static struct irq_domain *gpmc_irq_domain; static struct resource gpmc_mem_root; static struct gpmc_cs_data gpmc_cs[GPMC_CS_NUM]; @@ -243,8 +240,6 @@ static DEFINE_SPINLOCK(gpmc_mem_lock); /* Define chip-selects as reserved by default until probe completes */ static unsigned int gpmc_cs_num = GPMC_CS_NUM; static unsigned int gpmc_nr_waitpins; -static struct device *gpmc_dev; -static int gpmc_irq; static resource_size_t phys_base, mem_size; static unsigned gpmc_capability; static void __iomem *gpmc_base; @@ -1056,14 +1051,6 @@ int gpmc_configure(int cmd, int wval) u32 regval; switch (cmd) { - case GPMC_ENABLE_IRQ: - gpmc_write_reg(GPMC_IRQENABLE, wval); - break; - - case GPMC_SET_IRQ_STATUS: - gpmc_write_reg(GPMC_IRQSTATUS, wval); - break; - case GPMC_CONFIG_WP: regval = gpmc_read_reg(GPMC_CONFIG); if (wval) @@ -1153,85 +1140,97 @@ EXPORT_SYMBOL_GPL(gpmc_omap_get_nand_ops); int gpmc_get_client_irq(unsigned irq_config) { - int i; - - if (hweight32(irq_config) > 1) + if (!gpmc_irq_domain) { + pr_warn("%s called before GPMC IRQ domain available\n", + __func__); return 0; + } - for (i = 0; i < GPMC_NR_IRQ; i++) - if (gpmc_client_irq[i].bitmask & irq_config) - return gpmc_client_irq[i].irq; + if (irq_config >= GPMC_NR_IRQ) + return 0; - return 0; + return irq_create_mapping(gpmc_irq_domain, irq_config); } -static int gpmc_irq_endis(unsigned irq, bool endis) +static int gpmc_irq_endis(unsigned long hwirq, bool endis) { - int i; u32 regval; - for (i = 0; i < GPMC_NR_IRQ; i++) - if (irq == gpmc_client_irq[i].irq) { - regval = gpmc_read_reg(GPMC_IRQENABLE); - if (endis) - regval |= gpmc_client_irq[i].bitmask; - else - regval &= ~gpmc_client_irq[i].bitmask; - gpmc_write_reg(GPMC_IRQENABLE, regval); - break; - } + regval = gpmc_read_reg(GPMC_IRQENABLE); + if (endis) + regval |= BIT(hwirq); + else + regval &= ~BIT(hwirq); + gpmc_write_reg(GPMC_IRQENABLE, regval); return 0; } static void gpmc_irq_disable(struct irq_data *p) { - gpmc_irq_endis(p->irq, false); + gpmc_irq_endis(p->hwirq, false); } static void gpmc_irq_enable(struct irq_data *p) { - gpmc_irq_endis(p->irq, true); + gpmc_irq_endis(p->hwirq, true); } static void gpmc_irq_noop(struct irq_data *data) { } static unsigned int gpmc_irq_noop_ret(struct irq_data *data) { return 0; } -static int gpmc_setup_irq(void) +static int gpmc_irq_map(struct irq_domain *d, unsigned int virq, + irq_hw_number_t hw) { - int i; + struct gpmc_device *gpmc = d->host_data; + + irq_set_chip_data(virq, gpmc); + irq_set_chip_and_handler(virq, &gpmc->irq_chip, handle_simple_irq); + irq_modify_status(virq, IRQ_NOREQUEST, IRQ_NOAUTOEN); + + return 0; +} + +static const struct irq_domain_ops gpmc_irq_domain_ops = { + .map = gpmc_irq_map, + .xlate = irq_domain_xlate_twocell, +}; + +static irqreturn_t gpmc_handle_irq(int irq, void *data) +{ + int hwirq, virq; u32 regval; + struct gpmc_device *gpmc = data; - if (!gpmc_irq) - return -EINVAL; + regval = gpmc_read_reg(GPMC_IRQSTATUS); - gpmc_irq_start = irq_alloc_descs(-1, 0, GPMC_NR_IRQ, 0); - if (gpmc_irq_start < 0) { - pr_err("irq_alloc_descs failed\n"); - return gpmc_irq_start; - } + if (!regval) + return IRQ_NONE; - gpmc_irq_chip.name = "gpmc"; - gpmc_irq_chip.irq_startup = gpmc_irq_noop_ret; - gpmc_irq_chip.irq_enable = gpmc_irq_enable; - gpmc_irq_chip.irq_disable = gpmc_irq_disable; - gpmc_irq_chip.irq_shutdown = gpmc_irq_noop; - gpmc_irq_chip.irq_ack = gpmc_irq_noop; - gpmc_irq_chip.irq_mask = gpmc_irq_noop; - gpmc_irq_chip.irq_unmask = gpmc_irq_noop; - - gpmc_client_irq[0].bitmask = GPMC_IRQ_FIFOEVENTENABLE; - gpmc_client_irq[1].bitmask = GPMC_IRQ_COUNT_EVENT; - - for (i = 0; i < GPMC_NR_IRQ; i++) { - gpmc_client_irq[i].irq = gpmc_irq_start + i; - irq_set_chip_and_handler(gpmc_client_irq[i].irq, - &gpmc_irq_chip, handle_simple_irq); - irq_modify_status(gpmc_client_irq[i].irq, IRQ_NOREQUEST, - IRQ_NOAUTOEN); + for (hwirq = 0; hwirq < GPMC_NR_IRQ; hwirq++) { + if (regval & BIT(hwirq)) { + virq = irq_find_mapping(gpmc_irq_domain, hwirq); + if (!virq) { + dev_warn(gpmc->dev, + "spurious irq detected hwirq %d, virq %d\n", + hwirq, virq); + } + + generic_handle_irq(virq); + } } + gpmc_write_reg(GPMC_IRQSTATUS, regval); + + return IRQ_HANDLED; +} + +static int gpmc_setup_irq(struct gpmc_device *gpmc) +{ + u32 regval; + int rc; + /* Disable interrupts */ gpmc_write_reg(GPMC_IRQENABLE, 0); @@ -1239,22 +1238,46 @@ static int gpmc_setup_irq(void) regval = gpmc_read_reg(GPMC_IRQSTATUS); gpmc_write_reg(GPMC_IRQSTATUS, regval); - return request_irq(gpmc_irq, gpmc_handle_irq, 0, "gpmc", NULL); + gpmc->irq_chip.name = "gpmc"; + gpmc->irq_chip.irq_startup = gpmc_irq_noop_ret; + gpmc->irq_chip.irq_enable = gpmc_irq_enable; + gpmc->irq_chip.irq_disable = gpmc_irq_disable; + gpmc->irq_chip.irq_shutdown = gpmc_irq_noop; + gpmc->irq_chip.irq_ack = gpmc_irq_noop; + gpmc->irq_chip.irq_mask = gpmc_irq_noop; + gpmc->irq_chip.irq_unmask = gpmc_irq_noop; + + gpmc_irq_domain = irq_domain_add_linear(gpmc->dev->of_node, + GPMC_NR_IRQ, + &gpmc_irq_domain_ops, + gpmc); + if (!gpmc_irq_domain) { + dev_err(gpmc->dev, "IRQ domain add failed\n"); + return -ENODEV; + } + + rc = request_irq(gpmc->irq, gpmc_handle_irq, 0, "gpmc", gpmc); + if (rc) { + dev_err(gpmc->dev, "failed to request irq %d: %d\n", + gpmc->irq, rc); + irq_domain_remove(gpmc_irq_domain); + gpmc_irq_domain = NULL; + } + + return rc; } -static int gpmc_free_irq(void) +static int gpmc_free_irq(struct gpmc_device *gpmc) { - int i; + int hwirq; - if (gpmc_irq) - free_irq(gpmc_irq, NULL); + free_irq(gpmc->irq, gpmc); - for (i = 0; i < GPMC_NR_IRQ; i++) { - irq_set_handler(gpmc_client_irq[i].irq, NULL); - irq_set_chip(gpmc_client_irq[i].irq, &no_irq_chip); - } + for (hwirq = 0; hwirq < GPMC_NR_IRQ; hwirq++) + irq_dispose_mapping(irq_find_mapping(gpmc_irq_domain, hwirq)); - irq_free_descs(gpmc_irq_start, GPMC_NR_IRQ); + irq_domain_remove(gpmc_irq_domain); + gpmc_irq_domain = NULL; return 0; } @@ -2154,6 +2177,14 @@ static int gpmc_probe(struct platform_device *pdev) int rc; u32 l; struct resource *res; + struct gpmc_device *gpmc; + + gpmc = devm_kzalloc(&pdev->dev, sizeof(*gpmc), GFP_KERNEL); + if (!gpmc) + return -ENOMEM; + + gpmc->dev = &pdev->dev; + platform_set_drvdata(pdev, gpmc); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) @@ -2167,15 +2198,16 @@ static int gpmc_probe(struct platform_device *pdev) return PTR_ERR(gpmc_base); res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (res == NULL) - dev_warn(&pdev->dev, "Failed to get resource: irq\n"); - else - gpmc_irq = res->start; + if (!res) { + dev_err(&pdev->dev, "Failed to get resource: irq\n"); + return -ENOENT; + } + + gpmc->irq = res->start; gpmc_l3_clk = devm_clk_get(&pdev->dev, "fck"); if (IS_ERR(gpmc_l3_clk)) { dev_err(&pdev->dev, "Failed to get GPMC fck\n"); - gpmc_irq = 0; return PTR_ERR(gpmc_l3_clk); } @@ -2187,8 +2219,6 @@ static int gpmc_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); - gpmc_dev = &pdev->dev; - l = gpmc_read_reg(GPMC_REVISION); /* @@ -2207,13 +2237,16 @@ static int gpmc_probe(struct platform_device *pdev) gpmc_capability = GPMC_HAS_WR_ACCESS | GPMC_HAS_WR_DATA_MUX_BUS; if (GPMC_REVISION_MAJOR(l) > 0x5) gpmc_capability |= GPMC_HAS_MUX_AAD; - dev_info(gpmc_dev, "GPMC revision %d.%d\n", GPMC_REVISION_MAJOR(l), + dev_info(gpmc->dev, "GPMC revision %d.%d\n", GPMC_REVISION_MAJOR(l), GPMC_REVISION_MINOR(l)); gpmc_mem_init(); - if (gpmc_setup_irq() < 0) - dev_warn(gpmc_dev, "gpmc_setup_irq failed\n"); + rc = gpmc_setup_irq(gpmc); + if (rc) { + dev_err(gpmc->dev, "gpmc_setup_irq failed\n"); + goto fail; + } if (!pdev->dev.of_node) { gpmc_cs_num = GPMC_CS_NUM; @@ -2222,21 +2255,27 @@ static int gpmc_probe(struct platform_device *pdev) rc = gpmc_probe_dt(pdev); if (rc < 0) { - pm_runtime_put_sync(&pdev->dev); - dev_err(gpmc_dev, "failed to probe DT parameters\n"); - return rc; + dev_err(gpmc->dev, "failed to probe DT parameters\n"); + gpmc_free_irq(gpmc); + goto fail; } return 0; + +fail: + pm_runtime_put_sync(&pdev->dev); + return rc; } static int gpmc_remove(struct platform_device *pdev) { - gpmc_free_irq(); + struct gpmc_device *gpmc = platform_get_drvdata(pdev); + + gpmc_free_irq(gpmc); gpmc_mem_exit(); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - gpmc_dev = NULL; + return 0; } @@ -2282,25 +2321,6 @@ static __exit void gpmc_exit(void) postcore_initcall(gpmc_init); module_exit(gpmc_exit); -static irqreturn_t gpmc_handle_irq(int irq, void *dev) -{ - int i; - u32 regval; - - regval = gpmc_read_reg(GPMC_IRQSTATUS); - - if (!regval) - return IRQ_NONE; - - for (i = 0; i < GPMC_NR_IRQ; i++) - if (regval & gpmc_client_irq[i].bitmask) - generic_handle_irq(gpmc_client_irq[i].irq); - - gpmc_write_reg(GPMC_IRQSTATUS, regval); - - return IRQ_HANDLED; -} - static struct omap3_gpmc_regs gpmc_context; void omap3_gpmc_save_context(void) diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index dc2ada6fb9b4..9e9d79e8efa5 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -11,8 +11,9 @@ #define GPMC_CONFIG_WP 0x00000005 -#define GPMC_IRQ_FIFOEVENTENABLE 0x01 -#define GPMC_IRQ_COUNT_EVENT 0x02 +/* IRQ numbers in GPMC IRQ domain for legacy boot use */ +#define GPMC_IRQ_FIFOEVENTENABLE 0 +#define GPMC_IRQ_COUNT_EVENT 1 /** * gpmc_nand_ops - Interface between NAND and GPMC -- cgit v1.2.3 From c509aefd75d026f4ef4aa306131d7a780c2eda7b Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 5 Aug 2015 14:01:50 +0300 Subject: mtd: nand: omap: Use gpmc_omap_get_nand_ops() to get NAND registers Deprecate nand register passing via platform data and use gpmc_omap_get_nand_ops() instead. Signed-off-by: Roger Quadros Acked-by: Brian Norris Acked-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc-nand.c | 2 -- drivers/mtd/nand/omap2.c | 9 ++++++++- include/linux/platform_data/mtd-nand-omap2.h | 4 +++- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c index 72918c4973ea..04e6998c1529 100644 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ b/arch/arm/mach-omap2/gpmc-nand.c @@ -121,8 +121,6 @@ int gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data, if (err < 0) goto out_free_cs; - gpmc_update_nand_reg(&gpmc_nand_data->reg, gpmc_nand_data->cs); - if (!gpmc_hwecc_bch_capable(gpmc_nand_data->ecc_opt)) { pr_err("omap2-nand: Unsupported NAND ECC scheme selected\n"); err = -EINVAL; diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 0749ca1a1456..cba9bf0adba1 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -28,6 +28,7 @@ #include #include +#include #include #define DRIVER_NAME "omap2-nand" @@ -168,7 +169,9 @@ struct omap_nand_info { } iomode; u_char *buf; int buf_len; + /* Interface to GPMC */ struct gpmc_nand_regs reg; + struct gpmc_nand_ops *ops; /* generated at runtime depending on ECC algorithm and layout selected */ struct nand_ecclayout oobinfo; /* fields specific for BCHx_HW ECC scheme */ @@ -1665,9 +1668,13 @@ static int omap_nand_probe(struct platform_device *pdev) platform_set_drvdata(pdev, info); + info->ops = gpmc_omap_get_nand_ops(&info->reg, info->gpmc_cs); + if (!info->ops) { + dev_err(&pdev->dev, "Failed to get GPMC->NAND interface\n"); + return -ENODEV; + } info->pdev = pdev; info->gpmc_cs = pdata->cs; - info->reg = pdata->reg; info->of_node = pdata->of_node; info->ecc_opt = pdata->ecc_opt; nand_chip = &info->nand; diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 090bbab0130a..a067f581e938 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -75,10 +75,12 @@ struct omap_nand_platform_data { enum nand_io xfer_type; int devsize; enum omap_ecc ecc_opt; - struct gpmc_nand_regs reg; /* for passing the partitions */ struct device_node *of_node; struct device_node *elm_of_node; + + /* deprecated */ + struct gpmc_nand_regs reg; }; #endif -- cgit v1.2.3 From c9711ec5250b22fd94e9b34c17c095e001a90e66 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 21 May 2014 07:29:03 +0300 Subject: mtd: nand: omap: Clean up device tree support Move NAND specific device tree parsing to NAND driver. The NAND controller node must have a compatible id, register space resource and interrupt resource. Signed-off-by: Roger Quadros Acked-by: Brian Norris Acked-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc-nand.c | 5 +- drivers/memory/omap-gpmc.c | 143 +++++++-------------------- drivers/mtd/nand/omap2.c | 134 +++++++++++++++++++++---- include/linux/platform_data/mtd-nand-omap2.h | 3 +- 4 files changed, 153 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c index 04e6998c1529..f6ac027f3c3b 100644 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ b/arch/arm/mach-omap2/gpmc-nand.c @@ -97,10 +97,7 @@ int gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data, gpmc_nand_res[2].start = gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT); memset(&s, 0, sizeof(struct gpmc_settings)); - if (gpmc_nand_data->of_node) - gpmc_read_settings_dt(gpmc_nand_data->of_node, &s); - else - gpmc_set_legacy(gpmc_nand_data, &s); + gpmc_set_legacy(gpmc_nand_data, &s); s.device_nand = true; diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index e28d6bc2500a..8dc6e3b1c44a 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -1852,105 +1851,6 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np, of_property_read_bool(np, "gpmc,time-para-granularity"); } -#if IS_ENABLED(CONFIG_MTD_NAND) - -static const char * const nand_xfer_types[] = { - [NAND_OMAP_PREFETCH_POLLED] = "prefetch-polled", - [NAND_OMAP_POLLED] = "polled", - [NAND_OMAP_PREFETCH_DMA] = "prefetch-dma", - [NAND_OMAP_PREFETCH_IRQ] = "prefetch-irq", -}; - -static int gpmc_probe_nand_child(struct platform_device *pdev, - struct device_node *child) -{ - u32 val; - const char *s; - struct gpmc_timings gpmc_t; - struct omap_nand_platform_data *gpmc_nand_data; - - if (of_property_read_u32(child, "reg", &val) < 0) { - dev_err(&pdev->dev, "%s has no 'reg' property\n", - child->full_name); - return -ENODEV; - } - - gpmc_nand_data = devm_kzalloc(&pdev->dev, sizeof(*gpmc_nand_data), - GFP_KERNEL); - if (!gpmc_nand_data) - return -ENOMEM; - - gpmc_nand_data->cs = val; - gpmc_nand_data->of_node = child; - - /* Detect availability of ELM module */ - gpmc_nand_data->elm_of_node = of_parse_phandle(child, "ti,elm-id", 0); - if (gpmc_nand_data->elm_of_node == NULL) - gpmc_nand_data->elm_of_node = - of_parse_phandle(child, "elm_id", 0); - - /* select ecc-scheme for NAND */ - if (of_property_read_string(child, "ti,nand-ecc-opt", &s)) { - pr_err("%s: ti,nand-ecc-opt not found\n", __func__); - return -ENODEV; - } - - if (!strcmp(s, "sw")) - gpmc_nand_data->ecc_opt = OMAP_ECC_HAM1_CODE_SW; - else if (!strcmp(s, "ham1") || - !strcmp(s, "hw") || !strcmp(s, "hw-romcode")) - gpmc_nand_data->ecc_opt = - OMAP_ECC_HAM1_CODE_HW; - else if (!strcmp(s, "bch4")) - if (gpmc_nand_data->elm_of_node) - gpmc_nand_data->ecc_opt = - OMAP_ECC_BCH4_CODE_HW; - else - gpmc_nand_data->ecc_opt = - OMAP_ECC_BCH4_CODE_HW_DETECTION_SW; - else if (!strcmp(s, "bch8")) - if (gpmc_nand_data->elm_of_node) - gpmc_nand_data->ecc_opt = - OMAP_ECC_BCH8_CODE_HW; - else - gpmc_nand_data->ecc_opt = - OMAP_ECC_BCH8_CODE_HW_DETECTION_SW; - else if (!strcmp(s, "bch16")) - if (gpmc_nand_data->elm_of_node) - gpmc_nand_data->ecc_opt = - OMAP_ECC_BCH16_CODE_HW; - else - pr_err("%s: BCH16 requires ELM support\n", __func__); - else - pr_err("%s: ti,nand-ecc-opt invalid value\n", __func__); - - /* select data transfer mode for NAND controller */ - if (!of_property_read_string(child, "ti,nand-xfer-type", &s)) - for (val = 0; val < ARRAY_SIZE(nand_xfer_types); val++) - if (!strcasecmp(s, nand_xfer_types[val])) { - gpmc_nand_data->xfer_type = val; - break; - } - - gpmc_nand_data->flash_bbt = of_get_nand_on_flash_bbt(child); - - val = of_get_nand_bus_width(child); - if (val == 16) - gpmc_nand_data->devsize = NAND_BUSWIDTH_16; - - gpmc_read_timings_dt(child, &gpmc_t); - gpmc_nand_init(gpmc_nand_data, &gpmc_t); - - return 0; -} -#else -static int gpmc_probe_nand_child(struct platform_device *pdev, - struct device_node *child) -{ - return 0; -} -#endif - #if IS_ENABLED(CONFIG_MTD_ONENAND) static int gpmc_probe_onenand_child(struct platform_device *pdev, struct device_node *child) @@ -2069,9 +1969,42 @@ static int gpmc_probe_generic_child(struct platform_device *pdev, goto err; } - ret = of_property_read_u32(child, "bank-width", &gpmc_s.device_width); - if (ret < 0) - goto err; + if (of_node_cmp(child->name, "nand") == 0) { + /* Warn about older DT blobs with no compatible property */ + if (!of_property_read_bool(child, "compatible")) { + dev_warn(&pdev->dev, + "Incompatible NAND node: missing compatible"); + ret = -EINVAL; + goto err; + } + } + + if (of_device_is_compatible(child, "ti,omap2-nand")) { + /* NAND specific setup */ + val = of_get_nand_bus_width(child); + switch (val) { + case 8: + gpmc_s.device_width = GPMC_DEVWIDTH_8BIT; + break; + case 16: + gpmc_s.device_width = GPMC_DEVWIDTH_16BIT; + break; + default: + dev_err(&pdev->dev, "%s: invalid 'nand-bus-width'\n", + child->name); + ret = -EINVAL; + goto err; + } + + /* disable write protect */ + gpmc_configure(GPMC_CONFIG_WP, 0); + gpmc_s.device_nand = true; + } else { + ret = of_property_read_u32(child, "bank-width", + &gpmc_s.device_width); + if (ret < 0) + goto err; + } gpmc_cs_show_timings(cs, "before gpmc_cs_program_settings"); ret = gpmc_cs_program_settings(cs, &gpmc_s); @@ -2155,9 +2088,7 @@ static int gpmc_probe_dt(struct platform_device *pdev) if (!child->name) continue; - if (of_node_cmp(child->name, "nand") == 0) - ret = gpmc_probe_nand_child(pdev, child); - else if (of_node_cmp(child->name, "onenand") == 0) + if (of_node_cmp(child->name, "onenand") == 0) ret = gpmc_probe_onenand_child(pdev, child); else ret = gpmc_probe_generic_child(pdev, child); diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 7e4e263c7d9c..35b8f3359c17 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -176,11 +177,11 @@ struct omap_nand_info { /* Interface to GPMC */ struct gpmc_nand_regs reg; struct gpmc_nand_ops *ops; + bool flash_bbt; /* generated at runtime depending on ECC algorithm and layout selected */ struct nand_ecclayout oobinfo; /* fields specific for BCHx_HW ECC scheme */ struct device *elm_dev; - struct device_node *of_node; }; static inline struct omap_nand_info *mtd_to_omap(struct mtd_info *mtd) @@ -1643,10 +1644,86 @@ static bool omap2_nand_ecc_check(struct omap_nand_info *info, return true; } +static const char * const nand_xfer_types[] = { + [NAND_OMAP_PREFETCH_POLLED] = "prefetch-polled", + [NAND_OMAP_POLLED] = "polled", + [NAND_OMAP_PREFETCH_DMA] = "prefetch-dma", + [NAND_OMAP_PREFETCH_IRQ] = "prefetch-irq", +}; + +static int omap_get_dt_info(struct device *dev, struct omap_nand_info *info) +{ + struct device_node *child = dev->of_node; + int i; + const char *s; + u32 cs; + + if (of_property_read_u32(child, "reg", &cs) < 0) { + dev_err(dev, "reg not found in DT\n"); + return -EINVAL; + } + + info->gpmc_cs = cs; + + /* detect availability of ELM module. Won't be present pre-OMAP4 */ + info->elm_of_node = of_parse_phandle(child, "ti,elm-id", 0); + if (!info->elm_of_node) + dev_dbg(dev, "ti,elm-id not in DT\n"); + + /* select ecc-scheme for NAND */ + if (of_property_read_string(child, "ti,nand-ecc-opt", &s)) { + dev_err(dev, "ti,nand-ecc-opt not found\n"); + return -EINVAL; + } + + if (!strcmp(s, "sw")) { + info->ecc_opt = OMAP_ECC_HAM1_CODE_SW; + } else if (!strcmp(s, "ham1") || + !strcmp(s, "hw") || !strcmp(s, "hw-romcode")) { + info->ecc_opt = OMAP_ECC_HAM1_CODE_HW; + } else if (!strcmp(s, "bch4")) { + if (info->elm_of_node) + info->ecc_opt = OMAP_ECC_BCH4_CODE_HW; + else + info->ecc_opt = OMAP_ECC_BCH4_CODE_HW_DETECTION_SW; + } else if (!strcmp(s, "bch8")) { + if (info->elm_of_node) + info->ecc_opt = OMAP_ECC_BCH8_CODE_HW; + else + info->ecc_opt = OMAP_ECC_BCH8_CODE_HW_DETECTION_SW; + } else if (!strcmp(s, "bch16")) { + info->ecc_opt = OMAP_ECC_BCH16_CODE_HW; + } else { + dev_err(dev, "unrecognized value for ti,nand-ecc-opt\n"); + return -EINVAL; + } + + /* select data transfer mode */ + if (!of_property_read_string(child, "ti,nand-xfer-type", &s)) { + for (i = 0; i < ARRAY_SIZE(nand_xfer_types); i++) { + if (!strcasecmp(s, nand_xfer_types[i])) { + info->xfer_type = i; + goto next; + } + } + + dev_err(dev, "unrecognized value for ti,nand-xfer-type\n"); + return -EINVAL; + } + +next: + of_get_nand_on_flash_bbt(child); + + if (of_get_nand_bus_width(child) == 16) + info->devsize = NAND_BUSWIDTH_16; + + return 0; +} + static int omap_nand_probe(struct platform_device *pdev) { struct omap_nand_info *info; - struct omap_nand_platform_data *pdata; + struct omap_nand_platform_data *pdata = NULL; struct mtd_info *mtd; struct nand_chip *nand_chip; struct nand_ecclayout *ecclayout; @@ -1656,39 +1733,47 @@ static int omap_nand_probe(struct platform_device *pdev) unsigned sig; unsigned oob_index; struct resource *res; - - pdata = dev_get_platdata(&pdev->dev); - if (pdata == NULL) { - dev_err(&pdev->dev, "platform data missing\n"); - return -ENODEV; - } + struct device *dev = &pdev->dev; info = devm_kzalloc(&pdev->dev, sizeof(struct omap_nand_info), GFP_KERNEL); if (!info) return -ENOMEM; - platform_set_drvdata(pdev, info); + info->pdev = pdev; + if (dev->of_node) { + if (omap_get_dt_info(dev, info)) + return -EINVAL; + } else { + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "platform data missing\n"); + return -EINVAL; + } + + info->gpmc_cs = pdata->cs; + info->reg = pdata->reg; + info->ecc_opt = pdata->ecc_opt; + info->dev_ready = pdata->dev_ready; + info->xfer_type = pdata->xfer_type; + info->devsize = pdata->devsize; + info->elm_of_node = pdata->elm_of_node; + info->flash_bbt = pdata->flash_bbt; + } + + platform_set_drvdata(pdev, info); info->ops = gpmc_omap_get_nand_ops(&info->reg, info->gpmc_cs); if (!info->ops) { dev_err(&pdev->dev, "Failed to get GPMC->NAND interface\n"); return -ENODEV; } - info->pdev = pdev; - info->gpmc_cs = pdata->cs; - info->of_node = pdata->of_node; - info->ecc_opt = pdata->ecc_opt; - info->dev_ready = pdata->dev_ready; - info->xfer_type = pdata->xfer_type; - info->devsize = pdata->devsize; - info->elm_of_node = pdata->elm_of_node; nand_chip = &info->nand; mtd = nand_to_mtd(nand_chip); mtd->dev.parent = &pdev->dev; nand_chip->ecc.priv = NULL; - nand_set_flash_node(nand_chip, pdata->of_node); + nand_set_flash_node(nand_chip, dev->of_node); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res); @@ -1717,7 +1802,7 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->chip_delay = 50; } - if (pdata->flash_bbt) + if (info->flash_bbt) nand_chip->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB; else nand_chip->options |= NAND_SKIP_BBTSCAN; @@ -2035,7 +2120,10 @@ scan_tail: goto return_error; } - mtd_device_register(mtd, pdata->parts, pdata->nr_parts); + if (dev->of_node) + mtd_device_register(mtd, NULL, 0); + else + mtd_device_register(mtd, pdata->parts, pdata->nr_parts); platform_set_drvdata(pdev, mtd); @@ -2066,11 +2154,17 @@ static int omap_nand_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id omap_nand_ids[] = { + { .compatible = "ti,omap2-nand", }, + {}, +}; + static struct platform_driver omap_nand_driver = { .probe = omap_nand_probe, .remove = omap_nand_remove, .driver = { .name = DRIVER_NAME, + .of_match_table = of_match_ptr(omap_nand_ids), }, }; diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index a067f581e938..ff27e5a77e03 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -76,11 +76,10 @@ struct omap_nand_platform_data { int devsize; enum omap_ecc ecc_opt; - /* for passing the partitions */ - struct device_node *of_node; struct device_node *elm_of_node; /* deprecated */ struct gpmc_nand_regs reg; + struct device_node *of_node; }; #endif -- cgit v1.2.3 From 9e6946215dbd9803e8b511928c9f61f3a49e2c58 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 7 Aug 2015 10:38:13 +0300 Subject: memory: omap-gpmc: Prevent GPMC_STATUS from being accessed via gpmc_regs GPMC_STATUS register is private to the GPMC module and must not be accessed directly by NAND driver through the gpmc_regs. They must use gpmc_omap_get_nand_ops() instead. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- drivers/memory/omap-gpmc.c | 2 +- include/linux/platform_data/mtd-nand-omap2.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index ea9c89747950..33d69b1e4c31 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -1081,7 +1081,7 @@ void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) { int i; - reg->gpmc_status = gpmc_base + GPMC_STATUS; + reg->gpmc_status = NULL; /* deprecated */ reg->gpmc_nand_command = gpmc_base + GPMC_CS0_OFFSET + GPMC_CS_NAND_COMMAND + GPMC_CS_SIZE * cs; reg->gpmc_nand_address = gpmc_base + GPMC_CS0_OFFSET + diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index ff27e5a77e03..7f6de5377f80 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -45,7 +45,6 @@ enum omap_ecc { }; struct gpmc_nand_regs { - void __iomem *gpmc_status; void __iomem *gpmc_nand_command; void __iomem *gpmc_nand_address; void __iomem *gpmc_nand_data; @@ -64,6 +63,8 @@ struct gpmc_nand_regs { void __iomem *gpmc_bch_result4[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER]; + /* Deprecated. Do not use */ + void __iomem *gpmc_status; }; struct omap_nand_platform_data { -- cgit v1.2.3 From 10f22ee367c4aff7841da6a83c10445d7d6328d9 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 6 Aug 2015 17:39:35 +0300 Subject: mtd: nand: omap2: Implement NAND ready using gpiolib The GPMC WAIT pin status are now available over gpiolib. Update the omap_dev_ready() function to use gpio instead of directly accessing GPMC register space. Signed-off-by: Roger Quadros Acked-by: Brian Norris Acked-by: Boris Brezillon Acked-by: Tony Lindgren --- .../devicetree/bindings/mtd/gpmc-nand.txt | 2 ++ drivers/mtd/nand/omap2.c | 29 ++++++++++++++-------- include/linux/platform_data/mtd-nand-omap2.h | 2 +- 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt index ff3215d20343..3ee7e202657c 100644 --- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt +++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt @@ -48,6 +48,7 @@ Optional properties: locating ECC errors for BCHx algorithms. SoC devices which have ELM hardware engines should specify this device node in .dtsi Using ELM for ECC error correction frees some CPU cycles. + - rb-gpios: GPIO specifier for the ready/busy# pin. For inline partition table parsing (optional): @@ -78,6 +79,7 @@ Example for an AM33xx board: nand-bus-width = <16>; ti,nand-ecc-opt = "bch8"; ti,nand-xfer-type = "polled"; + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 35b8f3359c17..e0b2b2f0fbde 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -182,6 +183,8 @@ struct omap_nand_info { struct nand_ecclayout oobinfo; /* fields specific for BCHx_HW ECC scheme */ struct device *elm_dev; + /* NAND ready gpio */ + struct gpio_desc *ready_gpiod; }; static inline struct omap_nand_info *mtd_to_omap(struct mtd_info *mtd) @@ -1023,21 +1026,16 @@ static int omap_wait(struct mtd_info *mtd, struct nand_chip *chip) } /** - * omap_dev_ready - calls the platform specific dev_ready function + * omap_dev_ready - checks the NAND Ready GPIO line * @mtd: MTD device structure + * + * Returns true if ready and false if busy. */ static int omap_dev_ready(struct mtd_info *mtd) { - unsigned int val = 0; struct omap_nand_info *info = mtd_to_omap(mtd); - val = readl(info->reg.gpmc_status); - - if ((val & 0x100) == 0x100) { - return 1; - } else { - return 0; - } + return gpiod_get_value(info->ready_gpiod); } /** @@ -1755,7 +1753,9 @@ static int omap_nand_probe(struct platform_device *pdev) info->gpmc_cs = pdata->cs; info->reg = pdata->reg; info->ecc_opt = pdata->ecc_opt; - info->dev_ready = pdata->dev_ready; + if (pdata->dev_ready) + dev_info(&pdev->dev, "pdata->dev_ready is deprecated\n"); + info->xfer_type = pdata->xfer_type; info->devsize = pdata->devsize; info->elm_of_node = pdata->elm_of_node; @@ -1787,6 +1787,13 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->IO_ADDR_W = nand_chip->IO_ADDR_R; nand_chip->cmd_ctrl = omap_hwcontrol; + info->ready_gpiod = devm_gpiod_get_optional(&pdev->dev, "rb", + GPIOD_IN); + if (IS_ERR(info->ready_gpiod)) { + dev_err(dev, "failed to get ready gpio\n"); + return PTR_ERR(info->ready_gpiod); + } + /* * If RDY/BSY line is connected to OMAP then use the omap ready * function and the generic nand_wait function which reads the status @@ -1794,7 +1801,7 @@ static int omap_nand_probe(struct platform_device *pdev) * chip delay which is slightly more than tR (AC Timing) of the NAND * device and read status register until you get a failure or success */ - if (info->dev_ready) { + if (info->ready_gpiod) { nand_chip->dev_ready = omap_dev_ready; nand_chip->chip_delay = 0; } else { diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 7f6de5377f80..17d57a18bac5 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -71,7 +71,6 @@ struct omap_nand_platform_data { int cs; struct mtd_partition *parts; int nr_parts; - bool dev_ready; bool flash_bbt; enum nand_io xfer_type; int devsize; @@ -82,5 +81,6 @@ struct omap_nand_platform_data { /* deprecated */ struct gpmc_nand_regs reg; struct device_node *of_node; + bool dev_ready; }; #endif -- cgit v1.2.3 From 6eae29e7e7144d01a6d6af111d232b36cdd30f51 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 2 Apr 2016 10:54:56 -0500 Subject: crypto: doc - document correct return value for request allocation Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- Documentation/DocBook/crypto-API.tmpl | 6 +++--- include/crypto/aead.h | 3 +-- include/crypto/hash.h | 3 +-- include/crypto/skcipher.h | 3 +-- include/linux/crypto.h | 3 +-- 5 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl index 348619fcafb8..d55dc5a39bad 100644 --- a/Documentation/DocBook/crypto-API.tmpl +++ b/Documentation/DocBook/crypto-API.tmpl @@ -1936,9 +1936,9 @@ static int test_skcipher(void) } req = skcipher_request_alloc(skcipher, GFP_KERNEL); - if (IS_ERR(req)) { - pr_info("could not allocate request queue\n"); - ret = PTR_ERR(req); + if (!req) { + pr_info("could not allocate skcipher request\n"); + ret = -ENOMEM; goto out; } diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 957bb8763219..75174f80a106 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -405,8 +405,7 @@ static inline void aead_request_set_tfm(struct aead_request *req, * encrypt and decrypt API calls. During the allocation, the provided aead * handle is registered in the request data structure. * - * Return: allocated request handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. + * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, gfp_t gfp) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 1969f1416658..26605888a199 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -547,8 +547,7 @@ static inline void ahash_request_set_tfm(struct ahash_request *req, * the allocation, the provided ahash handle * is registered in the request data structure. * - * Return: allocated request handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. + * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct ahash_request *ahash_request_alloc( struct crypto_ahash *tfm, gfp_t gfp) diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 905490c1da89..0f987f50bb52 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -425,8 +425,7 @@ static inline struct skcipher_request *skcipher_request_cast( * encrypt and decrypt API calls. During the allocation, the provided skcipher * handle is registered in the request data structure. * - * Return: allocated request handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. + * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct skcipher_request *skcipher_request_alloc( struct crypto_skcipher *tfm, gfp_t gfp) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 99c94899ad0f..6e28c895c376 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -948,8 +948,7 @@ static inline struct ablkcipher_request *ablkcipher_request_cast( * encrypt and decrypt API calls. During the allocation, the provided ablkcipher * handle is registered in the request data structure. * - * Return: allocated request handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. + * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct ablkcipher_request *ablkcipher_request_alloc( struct crypto_ablkcipher *tfm, gfp_t gfp) -- cgit v1.2.3 From 675f10bde6cc3874632a8f684df2a8a2a8ace76e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Feb 2016 18:29:18 +0800 Subject: f2fs: fix to convert inline directory correctly With below serials, we will lose parts of dirents: 1) mount f2fs with inline_dentry option 2) echo 1 > /sys/fs/f2fs/sdX/dir_level 3) mkdir dir 4) touch 180 files named [1-180] in dir 5) touch 181 in dir 6) echo 3 > /proc/sys/vm/drop_caches 7) ll dir ls: cannot access 2: No such file or directory ls: cannot access 4: No such file or directory ls: cannot access 5: No such file or directory ls: cannot access 6: No such file or directory ls: cannot access 8: No such file or directory ls: cannot access 9: No such file or directory ... total 360 drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./ drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../ -rw-r--r-- 1 root root 0 Feb 19 15:12 1 -rw-r--r-- 1 root root 0 Feb 19 15:12 10 -rw-r--r-- 1 root root 0 Feb 19 15:12 100 -????????? ? ? ? ? ? 101 -????????? ? ? ? ? ? 102 -????????? ? ? ? ? ? 103 ... The reason is: when doing the inline dir conversion, we didn't consider that directory has hierarchical hash structure which can be configured through sysfs interface 'dir_level'. By default, dir_level of directory inode is 0, it means we have one bucket in hash table located in first level, all dirents will be hashed in this bucket, so it has no problem for us to do the duplication simply between inline dentry page and converted normal dentry page. However, if we configured dir_level with the value N (greater than 0), it will expand the bucket number of first level hash table by 2^N - 1, it hashs dirents into different buckets according their hash value, if we still move all dirents to first bucket, it makes incorrent locating for inline dirents, the result is, although we can iterate all dirents through ->readdir, we can't stat some of them in ->lookup which based on hash table searching. This patch fixes this issue by rehashing dirents into correct position when converting inline directory. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 87 ++++++++++++++++++++++++--------------------- fs/f2fs/f2fs.h | 4 ++- fs/f2fs/inline.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/f2fs_fs.h | 2 ++ 4 files changed, 144 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index af819571bce7..e90380d82214 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -48,7 +48,6 @@ unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_SYMLINK] = DT_LNK, }; -#define S_SHIFT 12 static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, @@ -64,6 +63,13 @@ void set_de_type(struct f2fs_dir_entry *de, umode_t mode) de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } +unsigned char get_de_type(struct f2fs_dir_entry *de) +{ + if (de->file_type < F2FS_FT_MAX) + return f2fs_filetype_table[de->file_type]; + return DT_UNKNOWN; +} + static unsigned long dir_block_index(unsigned int level, int dir_level, unsigned int idx) { @@ -509,11 +515,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, } } -/* - * Caller should grab and release a rwsem by calling f2fs_lock_op() and - * f2fs_unlock_op(). - */ -int __f2fs_add_link(struct inode *dir, const struct qstr *name, +int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; @@ -526,28 +528,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; struct page *page = NULL; - struct fscrypt_name fname; - struct qstr new_name; - int slots, err; - - err = fscrypt_setup_filename(dir, name, 0, &fname); - if (err) - return err; - - new_name.name = fname_name(&fname); - new_name.len = fname_len(&fname); - - if (f2fs_has_inline_dentry(dir)) { - err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); - if (!err || err != -EAGAIN) - goto out; - else - err = 0; - } + int slots, err = 0; level = 0; - slots = GET_DENTRY_SLOTS(new_name.len); - dentry_hash = f2fs_dentry_hash(&new_name); + slots = GET_DENTRY_SLOTS(new_name->len); + dentry_hash = f2fs_dentry_hash(new_name); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { @@ -556,10 +541,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } start: - if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) { - err = -ENOSPC; - goto out; - } + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) + return -ENOSPC; /* Increase the depth, if required */ if (level == current_depth) @@ -573,10 +556,8 @@ start: for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = get_new_data_page(dir, NULL, block, true); - if (IS_ERR(dentry_page)) { - err = PTR_ERR(dentry_page); - goto out; - } + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); dentry_blk = kmap(dentry_page); bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, @@ -596,7 +577,7 @@ add_dentry: if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, &new_name, NULL); + page = init_inode_metadata(inode, dir, new_name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -606,7 +587,7 @@ add_dentry: } make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); - f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -628,7 +609,34 @@ fail: } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); -out: + + return err; +} + +/* + * Caller should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op(). + */ +int __f2fs_add_link(struct inode *dir, const struct qstr *name, + struct inode *inode, nid_t ino, umode_t mode) +{ + struct fscrypt_name fname; + struct qstr new_name; + int err; + + err = fscrypt_setup_filename(dir, name, 0, &fname); + if (err) + return err; + + new_name.name = fname_name(&fname); + new_name.len = fname_len(&fname); + + err = -EAGAIN; + if (f2fs_has_inline_dentry(dir)) + err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); + if (err == -EAGAIN) + err = f2fs_add_regular_entry(dir, &new_name, inode, ino, mode); + fscrypt_free_filename(&fname); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; @@ -792,10 +800,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, continue; } - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; + d_type = get_de_type(de); de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e1c07b60f301..3f1551395244 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1711,7 +1711,7 @@ struct dentry *f2fs_get_parent(struct dentry *child); */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; void set_de_type(struct f2fs_dir_entry *, umode_t); - +unsigned char get_de_type(struct f2fs_dir_entry *); struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, f2fs_hash_t, int *, struct f2fs_dentry_ptr *); bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, @@ -1732,6 +1732,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, int update_dent_inode(struct inode *, struct inode *, const struct qstr *); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); +int f2fs_add_regular_entry(struct inode *, const struct qstr *, + struct inode *, nid_t, umode_t); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, umode_t); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a2fbe6f427d3..772056587eb9 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -355,7 +355,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, * NOTE: ipage is grabbed by caller, but if any error occurs, we should * release ipage in this function. */ -static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, +static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, struct f2fs_inline_dentry *inline_dentry) { struct page *page; @@ -416,6 +416,98 @@ out: return err; } +static int f2fs_add_inline_entries(struct inode *dir, + struct f2fs_inline_dentry *inline_dentry) +{ + struct f2fs_dentry_ptr d; + unsigned long bit_pos = 0; + int err = 0; + + make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + + while (bit_pos < d.max) { + struct f2fs_dir_entry *de; + struct qstr new_name; + nid_t ino; + umode_t fake_mode; + + if (!test_bit_le(bit_pos, d.bitmap)) { + bit_pos++; + continue; + } + + de = &d.dentry[bit_pos]; + new_name.name = d.filename[bit_pos]; + new_name.len = de->name_len; + + ino = le32_to_cpu(de->ino); + fake_mode = get_de_type(de) << S_SHIFT; + + err = f2fs_add_regular_entry(dir, &new_name, NULL, + ino, fake_mode); + if (err) + goto punch_dentry_pages; + + if (unlikely(!de->name_len)) + d.max = -1; + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + } + return 0; +punch_dentry_pages: + truncate_inode_pages(&dir->i_data, 0); + truncate_blocks(dir, 0, false); + remove_dirty_inode(dir); + return err; +} + +static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + struct f2fs_inline_dentry *backup_dentry; + int err; + + backup_dentry = kmalloc(sizeof(struct f2fs_inline_dentry), + GFP_F2FS_ZERO); + if (!backup_dentry) + return -ENOMEM; + + memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); + truncate_inline_inode(ipage, 0); + + unlock_page(ipage); + + err = f2fs_add_inline_entries(dir, backup_dentry); + if (err) + goto recover; + + lock_page(ipage); + + stat_dec_inline_dir(dir); + clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); + update_inode(dir, ipage); + kfree(backup_dentry); + return 0; +recover: + lock_page(ipage); + memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); + i_size_write(dir, MAX_INLINE_DATA); + update_inode(dir, ipage); + f2fs_put_page(ipage, 1); + + kfree(backup_dentry); + return err; +} + +static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + if (!F2FS_I(dir)->i_dir_level) + return f2fs_move_inline_dirents(dir, ipage, inline_dentry); + else + return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); +} + int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index b90e9bdbd1dd..4c02c6521fef 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -508,4 +508,6 @@ enum { F2FS_FT_MAX }; +#define S_SHIFT 12 + #endif /* _LINUX_F2FS_FS_H */ -- cgit v1.2.3 From 298535c00a2cbcd59e38f8f1c0c9ae7b9911e946 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 8 Apr 2016 15:50:25 -0700 Subject: of, numa: Add NUMA of binding implementation. Add device tree parsing for NUMA topology using device "numa-node-id" property in distance-map and cpu nodes. This is a complete rewrite of a previous patch by: Ganapatrao Kulkarni Signed-off-by: David Daney Acked-by: Rob Herring Signed-off-by: Will Deacon --- drivers/of/Kconfig | 3 + drivers/of/Makefile | 1 + drivers/of/of_numa.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/of.h | 9 +++ 4 files changed, 224 insertions(+) create mode 100644 drivers/of/of_numa.c (limited to 'include/linux') diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index e2a48415d969..b3bec3aaa45d 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -112,4 +112,7 @@ config OF_OVERLAY While this option is selected automatically when needed, you can enable it manually to improve device tree unit test coverage. +config OF_NUMA + bool + endif # OF diff --git a/drivers/of/Makefile b/drivers/of/Makefile index 156c072b3117..bee3fa96b981 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -14,5 +14,6 @@ obj-$(CONFIG_OF_MTD) += of_mtd.o obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o obj-$(CONFIG_OF_RESOLVE) += resolver.o obj-$(CONFIG_OF_OVERLAY) += overlay.o +obj-$(CONFIG_OF_NUMA) += of_numa.o obj-$(CONFIG_OF_UNITTEST) += unittest-data/ diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c new file mode 100644 index 000000000000..0f2784bc1874 --- /dev/null +++ b/drivers/of/of_numa.c @@ -0,0 +1,211 @@ +/* + * OF NUMA Parsing support. + * + * Copyright (C) 2015 - 2016 Cavium Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +/* define default numa node to 0 */ +#define DEFAULT_NODE 0 + +/* + * Even though we connect cpus to numa domains later in SMP + * init, we need to know the node ids now for all cpus. +*/ +static void __init of_numa_parse_cpu_nodes(void) +{ + u32 nid; + int r; + struct device_node *cpus; + struct device_node *np = NULL; + + cpus = of_find_node_by_path("/cpus"); + if (!cpus) + return; + + for_each_child_of_node(cpus, np) { + /* Skip things that are not CPUs */ + if (of_node_cmp(np->type, "cpu") != 0) + continue; + + r = of_property_read_u32(np, "numa-node-id", &nid); + if (r) + continue; + + pr_debug("NUMA: CPU on %u\n", nid); + if (nid >= MAX_NUMNODES) + pr_warn("NUMA: Node id %u exceeds maximum value\n", + nid); + else + node_set(nid, numa_nodes_parsed); + } +} + +static int __init of_numa_parse_memory_nodes(void) +{ + struct device_node *np = NULL; + struct resource rsrc; + u32 nid; + int r = 0; + + for (;;) { + np = of_find_node_by_type(np, "memory"); + if (!np) + break; + + r = of_property_read_u32(np, "numa-node-id", &nid); + if (r == -EINVAL) + /* + * property doesn't exist if -EINVAL, continue + * looking for more memory nodes with + * "numa-node-id" property + */ + continue; + else if (r) + /* some other error */ + break; + + r = of_address_to_resource(np, 0, &rsrc); + if (r) { + pr_err("NUMA: bad reg property in memory node\n"); + break; + } + + pr_debug("NUMA: base = %llx len = %llx, node = %u\n", + rsrc.start, rsrc.end - rsrc.start + 1, nid); + + r = numa_add_memblk(nid, rsrc.start, + rsrc.end - rsrc.start + 1); + if (r) + break; + } + of_node_put(np); + + return r; +} + +static int __init of_numa_parse_distance_map_v1(struct device_node *map) +{ + const __be32 *matrix; + int entry_count; + int i; + + pr_info("NUMA: parsing numa-distance-map-v1\n"); + + matrix = of_get_property(map, "distance-matrix", NULL); + if (!matrix) { + pr_err("NUMA: No distance-matrix property in distance-map\n"); + return -EINVAL; + } + + entry_count = of_property_count_u32_elems(map, "distance-matrix"); + if (entry_count <= 0) { + pr_err("NUMA: Invalid distance-matrix\n"); + return -EINVAL; + } + + for (i = 0; i + 2 < entry_count; i += 3) { + u32 nodea, nodeb, distance; + + nodea = of_read_number(matrix, 1); + matrix++; + nodeb = of_read_number(matrix, 1); + matrix++; + distance = of_read_number(matrix, 1); + matrix++; + + numa_set_distance(nodea, nodeb, distance); + pr_debug("NUMA: distance[node%d -> node%d] = %d\n", + nodea, nodeb, distance); + + /* Set default distance of node B->A same as A->B */ + if (nodeb > nodea) + numa_set_distance(nodeb, nodea, distance); + } + + return 0; +} + +static int __init of_numa_parse_distance_map(void) +{ + int ret = 0; + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, + "numa-distance-map-v1"); + if (np) + ret = of_numa_parse_distance_map_v1(np); + + of_node_put(np); + return ret; +} + +int of_node_to_nid(struct device_node *device) +{ + struct device_node *np; + u32 nid; + int r = -ENODATA; + + np = of_node_get(device); + + while (np) { + struct device_node *parent; + + r = of_property_read_u32(np, "numa-node-id", &nid); + /* + * -EINVAL indicates the property was not found, and + * we walk up the tree trying to find a parent with a + * "numa-node-id". Any other type of error indicates + * a bad device tree and we give up. + */ + if (r != -EINVAL) + break; + + parent = of_get_parent(np); + of_node_put(np); + np = parent; + } + if (np && r) + pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n", + np->name); + of_node_put(np); + + if (!r) { + if (nid >= MAX_NUMNODES) + pr_warn("NUMA: Node id %u exceeds maximum value\n", + nid); + else + return nid; + } + + return NUMA_NO_NODE; +} +EXPORT_SYMBOL(of_node_to_nid); + +int __init of_numa_init(void) +{ + int r; + + of_numa_parse_cpu_nodes(); + r = of_numa_parse_memory_nodes(); + if (r) + return r; + return of_numa_parse_distance_map(); +} diff --git a/include/linux/of.h b/include/linux/of.h index 7fcb681baadf..76f07c84040f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -685,6 +685,15 @@ static inline int of_node_to_nid(struct device_node *device) } #endif +#ifdef CONFIG_OF_NUMA +extern int of_numa_init(void); +#else +static inline int of_numa_init(void) +{ + return -ENOSYS; +} +#endif + static inline struct device_node *of_find_matching_node( struct device_node *from, const struct of_device_id *matches) -- cgit v1.2.3 From 522566376a3f8373fbd5ff75bb8a7a2da701c1a7 Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Thu, 14 Apr 2016 11:59:31 +0200 Subject: devcoredump: add scatterlist support Add scatterlist support (dev_coredumpsg) to allow drivers to avoid vmalloc() like dev_coredumpm(), while also avoiding the module reference that the latter function requires. This internally uses dev_coredumpm() with function inside the devcoredump module, requiring removing the const (which touches the driver using it.) Signed-off-by: Aviya Erenfeld Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/base/devcoredump.c | 83 +++++++++++++++++++++--- drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c | 4 +- include/linux/devcoredump.h | 86 ++++++++++++++++++++++--- 3 files changed, 154 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 1bd120a0b084..240374fd1838 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -4,6 +4,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2014 Intel Mobile Communications GmbH + * Copyright(c) 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -41,12 +42,12 @@ static bool devcd_disabled; struct devcd_entry { struct device devcd_dev; - const void *data; + void *data; size_t datalen; struct module *owner; ssize_t (*read)(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen); - void (*free)(const void *data); + void *data, size_t datalen); + void (*free)(void *data); struct delayed_work del_wk; struct device *failing_dev; }; @@ -174,7 +175,7 @@ static struct class devcd_class = { }; static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen) + void *data, size_t datalen) { if (offset > datalen) return -EINVAL; @@ -188,6 +189,11 @@ static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count, return count; } +static void devcd_freev(void *data) +{ + vfree(data); +} + /** * dev_coredumpv - create device coredump with vmalloc data * @dev: the struct device for the crashed device @@ -198,10 +204,10 @@ static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count, * This function takes ownership of the vmalloc'ed data and will free * it when it is no longer used. See dev_coredumpm() for more information. */ -void dev_coredumpv(struct device *dev, const void *data, size_t datalen, +void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp) { - dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, vfree); + dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev); } EXPORT_SYMBOL_GPL(dev_coredumpv); @@ -212,6 +218,44 @@ static int devcd_match_failing(struct device *dev, const void *failing) return devcd->failing_dev == failing; } +/** + * devcd_free_sgtable - free all the memory of the given scatterlist table + * (i.e. both pages and scatterlist instances) + * NOTE: if two tables allocated with devcd_alloc_sgtable and then chained + * using the sg_chain function then that function should be called only once + * on the chained table + * @table: pointer to sg_table to free + */ +static void devcd_free_sgtable(void *data) +{ + _devcd_free_sgtable(data); +} + +/** + * devcd_read_from_table - copy data from sg_table to a given buffer + * and return the number of bytes read + * @buffer: the buffer to copy the data to it + * @buf_len: the length of the buffer + * @data: the scatterlist table to copy from + * @offset: start copy from @offset@ bytes from the head of the data + * in the given scatterlist + * @data_len: the length of the data in the sg_table + */ +static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset, + size_t buf_len, void *data, + size_t data_len) +{ + struct scatterlist *table = data; + + if (offset > data_len) + return -EINVAL; + + if (offset + buf_len > data_len) + buf_len = data_len - offset; + return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len, + offset); +} + /** * dev_coredumpm - create device coredump with read/free methods * @dev: the struct device for the crashed device @@ -228,10 +272,10 @@ static int devcd_match_failing(struct device *dev, const void *failing) * function will be called to free the data. */ void dev_coredumpm(struct device *dev, struct module *owner, - const void *data, size_t datalen, gfp_t gfp, + void *data, size_t datalen, gfp_t gfp, ssize_t (*read)(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen), - void (*free)(const void *data)) + void *data, size_t datalen), + void (*free)(void *data)) { static atomic_t devcd_count = ATOMIC_INIT(0); struct devcd_entry *devcd; @@ -291,6 +335,27 @@ void dev_coredumpm(struct device *dev, struct module *owner, } EXPORT_SYMBOL_GPL(dev_coredumpm); +/** + * dev_coredumpmsg - create device coredump that uses scatterlist as data + * parameter + * @dev: the struct device for the crashed device + * @table: the dump data + * @datalen: length of the data + * @gfp: allocation flags + * + * Creates a new device coredump for the given device. If a previous one hasn't + * been read yet, the new coredump is discarded. The data lifetime is determined + * by the device coredump framework and when it is no longer needed + * it will free the data. + */ +void dev_coredumpsg(struct device *dev, struct scatterlist *table, + size_t datalen, gfp_t gfp) +{ + dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable, + devcd_free_sgtable); +} +EXPORT_SYMBOL_GPL(dev_coredumpsg); + static int __init devcoredump_init(void) { return class_register(&devcd_class); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index 4856eac120f6..a4b0581d2275 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -71,7 +71,7 @@ #include "iwl-csr.h" static ssize_t iwl_mvm_read_coredump(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen) + void *data, size_t datalen) { const struct iwl_mvm_dump_ptrs *dump_ptrs = data; ssize_t bytes_read; @@ -104,7 +104,7 @@ static ssize_t iwl_mvm_read_coredump(char *buffer, loff_t offset, size_t count, return bytes_read + bytes_read_trans; } -static void iwl_mvm_free_coredump(const void *data) +static void iwl_mvm_free_coredump(void *data) { const struct iwl_mvm_dump_ptrs *fw_error_dump = data; diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c0a360e99f64..269521f143ac 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -1,3 +1,22 @@ +/* + * This file is provided under the GPLv2 license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Deutschland GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + */ #ifndef __DEVCOREDUMP_H #define __DEVCOREDUMP_H @@ -5,17 +24,62 @@ #include #include +#include +#include + +/* + * _devcd_free_sgtable - free all the memory of the given scatterlist table + * (i.e. both pages and scatterlist instances) + * NOTE: if two tables allocated and chained using the sg_chain function then + * this function should be called only once on the first table + * @table: pointer to sg_table to free + */ +static inline void _devcd_free_sgtable(struct scatterlist *table) +{ + int i; + struct page *page; + struct scatterlist *iter; + struct scatterlist *delete_iter; + + /* free pages */ + iter = table; + for_each_sg(table, iter, sg_nents(table), i) { + page = sg_page(iter); + if (page) + __free_page(page); + } + + /* then free all chained tables */ + iter = table; + delete_iter = table; /* always points on a head of a table */ + while (!sg_is_last(iter)) { + iter++; + if (sg_is_chain(iter)) { + iter = sg_chain_ptr(iter); + kfree(delete_iter); + delete_iter = iter; + } + } + + /* free the last table */ + kfree(delete_iter); +} + + #ifdef CONFIG_DEV_COREDUMP -void dev_coredumpv(struct device *dev, const void *data, size_t datalen, +void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp); void dev_coredumpm(struct device *dev, struct module *owner, - const void *data, size_t datalen, gfp_t gfp, + void *data, size_t datalen, gfp_t gfp, ssize_t (*read)(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen), - void (*free)(const void *data)); + void *data, size_t datalen), + void (*free)(void *data)); + +void dev_coredumpsg(struct device *dev, struct scatterlist *table, + size_t datalen, gfp_t gfp); #else -static inline void dev_coredumpv(struct device *dev, const void *data, +static inline void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp) { vfree(data); @@ -23,13 +87,19 @@ static inline void dev_coredumpv(struct device *dev, const void *data, static inline void dev_coredumpm(struct device *dev, struct module *owner, - const void *data, size_t datalen, gfp_t gfp, + void *data, size_t datalen, gfp_t gfp, ssize_t (*read)(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen), - void (*free)(const void *data)) + void *data, size_t datalen), + void (*free)(void *data)) { free(data); } + +static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table, + size_t datalen, gfp_t gfp) +{ + _devcd_free_sgtable(table); +} #endif /* CONFIG_DEV_COREDUMP */ #endif /* __DEVCOREDUMP_H */ -- cgit v1.2.3 From 9b1d6c8950021ab007608d455fc9c398ecd25476 Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Mon, 4 Apr 2016 14:48:11 -0700 Subject: lib: scatterlist: move SG pool code from SCSI driver to lib/sg_pool.c Now it's ready to move the mempool based SG chained allocator code from SCSI driver to lib/sg_pool.c, which will be compiled only based on a Kconfig symbol CONFIG_SG_POOL. SCSI selects CONFIG_SG_POOL. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lin Reviewed-by: Sagi Grimberg Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 1 + drivers/scsi/scsi_lib.c | 137 ----------------------------------- include/linux/scatterlist.h | 25 +++++++ include/scsi/scsi.h | 19 ----- lib/Kconfig | 7 ++ lib/Makefile | 1 + lib/sg_pool.c | 172 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 206 insertions(+), 156 deletions(-) create mode 100644 lib/sg_pool.c (limited to 'include/linux') diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 0950567e6269..98e5d51a3346 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -17,6 +17,7 @@ config SCSI tristate "SCSI device support" depends on BLOCK select SCSI_DMA if HAS_DMA + select SG_POOL ---help--- If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or any other SCSI device under Linux, say Y and make sure that you know diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8f776f1e95ce..b920c5dabf60 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -14,8 +14,6 @@ #include #include #include -#include -#include #include #include #include @@ -40,39 +38,6 @@ #include "scsi_logging.h" -#define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools) -#define SG_MEMPOOL_SIZE 2 - -struct sg_pool { - size_t size; - char *name; - struct kmem_cache *slab; - mempool_t *pool; -}; - -#define SP(x) { .size = x, "sgpool-" __stringify(x) } -#if (SG_CHUNK_SIZE < 32) -#error SG_CHUNK_SIZE is too small (must be 32 or greater) -#endif -static struct sg_pool sg_pools[] = { - SP(8), - SP(16), -#if (SG_CHUNK_SIZE > 32) - SP(32), -#if (SG_CHUNK_SIZE > 64) - SP(64), -#if (SG_CHUNK_SIZE > 128) - SP(128), -#if (SG_CHUNK_SIZE > 256) -#error SG_CHUNK_SIZE is too large (256 MAX) -#endif -#endif -#endif -#endif - SP(SG_CHUNK_SIZE) -}; -#undef SP - struct kmem_cache *scsi_sdb_cache; /* @@ -553,65 +518,6 @@ void scsi_run_host_queues(struct Scsi_Host *shost) scsi_run_queue(sdev->request_queue); } -static inline unsigned int sg_pool_index(unsigned short nents) -{ - unsigned int index; - - BUG_ON(nents > SG_CHUNK_SIZE); - - if (nents <= 8) - index = 0; - else - index = get_count_order(nents) - 3; - - return index; -} - -static void sg_pool_free(struct scatterlist *sgl, unsigned int nents) -{ - struct sg_pool *sgp; - - sgp = sg_pools + sg_pool_index(nents); - mempool_free(sgl, sgp->pool); -} - -static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask) -{ - struct sg_pool *sgp; - - sgp = sg_pools + sg_pool_index(nents); - return mempool_alloc(sgp->pool, gfp_mask); -} - -static void sg_free_table_chained(struct sg_table *table, bool first_chunk) -{ - if (first_chunk && table->orig_nents <= SG_CHUNK_SIZE) - return; - __sg_free_table(table, SG_CHUNK_SIZE, first_chunk, sg_pool_free); -} - -static int sg_alloc_table_chained(struct sg_table *table, int nents, - struct scatterlist *first_chunk) -{ - int ret; - - BUG_ON(!nents); - - if (first_chunk) { - if (nents <= SG_CHUNK_SIZE) { - table->nents = table->orig_nents = nents; - sg_init_table(table->sgl, nents); - return 0; - } - } - - ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE, - first_chunk, GFP_ATOMIC, sg_pool_alloc); - if (unlikely(ret)) - sg_free_table_chained(table, (bool)first_chunk); - return ret; -} - static void scsi_uninit_cmd(struct scsi_cmnd *cmd) { if (cmd->request->cmd_type == REQ_TYPE_FS) { @@ -2269,8 +2175,6 @@ EXPORT_SYMBOL(scsi_unblock_requests); int __init scsi_init_queue(void) { - int i; - scsi_sdb_cache = kmem_cache_create("scsi_data_buffer", sizeof(struct scsi_data_buffer), 0, 0, NULL); @@ -2279,53 +2183,12 @@ int __init scsi_init_queue(void) return -ENOMEM; } - for (i = 0; i < SG_MEMPOOL_NR; i++) { - struct sg_pool *sgp = sg_pools + i; - int size = sgp->size * sizeof(struct scatterlist); - - sgp->slab = kmem_cache_create(sgp->name, size, 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!sgp->slab) { - printk(KERN_ERR "SCSI: can't init sg slab %s\n", - sgp->name); - goto cleanup_sdb; - } - - sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE, - sgp->slab); - if (!sgp->pool) { - printk(KERN_ERR "SCSI: can't init sg mempool %s\n", - sgp->name); - goto cleanup_sdb; - } - } - return 0; - -cleanup_sdb: - for (i = 0; i < SG_MEMPOOL_NR; i++) { - struct sg_pool *sgp = sg_pools + i; - if (sgp->pool) - mempool_destroy(sgp->pool); - if (sgp->slab) - kmem_cache_destroy(sgp->slab); - } - kmem_cache_destroy(scsi_sdb_cache); - - return -ENOMEM; } void scsi_exit_queue(void) { - int i; - kmem_cache_destroy(scsi_sdb_cache); - - for (i = 0; i < SG_MEMPOOL_NR; i++) { - struct sg_pool *sgp = sg_pools + i; - mempool_destroy(sgp->pool); - kmem_cache_destroy(sgp->slab); - } } /** diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 556ec1ea2574..cb3c8fe6acd7 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -285,6 +285,31 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, */ #define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) +/* + * The maximum number of SG segments that we will put inside a + * scatterlist (unless chaining is used). Should ideally fit inside a + * single page, to avoid a higher order allocation. We could define this + * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The + * minimum value is 32 + */ +#define SG_CHUNK_SIZE 128 + +/* + * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit + * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. + */ +#ifdef CONFIG_ARCH_HAS_SG_CHAIN +#define SG_MAX_SEGMENTS 2048 +#else +#define SG_MAX_SEGMENTS SG_CHUNK_SIZE +#endif + +#ifdef CONFIG_SG_POOL +void sg_free_table_chained(struct sg_table *table, bool first_chunk); +int sg_alloc_table_chained(struct sg_table *table, int nents, + struct scatterlist *first_chunk); +#endif + /* * sg page iterator * diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 74dafa75bae7..8ec7c30e35af 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -17,25 +17,6 @@ enum scsi_timeouts { SCSI_DEFAULT_EH_TIMEOUT = 10 * HZ, }; -/* - * The maximum number of SG segments that we will put inside a - * scatterlist (unless chaining is used). Should ideally fit inside a - * single page, to avoid a higher order allocation. We could define this - * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The - * minimum value is 32 - */ -#define SG_CHUNK_SIZE 128 - -/* - * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit - * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. - */ -#ifdef CONFIG_ARCH_HAS_SG_CHAIN -#define SG_MAX_SEGMENTS 2048 -#else -#define SG_MAX_SEGMENTS SG_CHUNK_SIZE -#endif - /* * DIX-capable adapters effectively support infinite chaining for the * protection information scatterlist diff --git a/lib/Kconfig b/lib/Kconfig index 3cca1222578e..61d55bd0ed89 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -523,6 +523,13 @@ config SG_SPLIT a scatterlist. This should be selected by a driver or an API which whishes to split a scatterlist amongst multiple DMA channels. +config SG_POOL + def_bool n + help + Provides a helper to allocate chained scatterlists. This should be + selected by a driver or an API which whishes to allocate chained + scatterlist. + # # sg chaining option # diff --git a/lib/Makefile b/lib/Makefile index 7bd6fd436c97..bf01c2673423 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -178,6 +178,7 @@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_SG_SPLIT) += sg_split.o +obj-$(CONFIG_SG_POOL) += sg_pool.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o diff --git a/lib/sg_pool.c b/lib/sg_pool.c new file mode 100644 index 000000000000..6dd30615a201 --- /dev/null +++ b/lib/sg_pool.c @@ -0,0 +1,172 @@ +#include +#include +#include +#include + +#define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools) +#define SG_MEMPOOL_SIZE 2 + +struct sg_pool { + size_t size; + char *name; + struct kmem_cache *slab; + mempool_t *pool; +}; + +#define SP(x) { .size = x, "sgpool-" __stringify(x) } +#if (SG_CHUNK_SIZE < 32) +#error SG_CHUNK_SIZE is too small (must be 32 or greater) +#endif +static struct sg_pool sg_pools[] = { + SP(8), + SP(16), +#if (SG_CHUNK_SIZE > 32) + SP(32), +#if (SG_CHUNK_SIZE > 64) + SP(64), +#if (SG_CHUNK_SIZE > 128) + SP(128), +#if (SG_CHUNK_SIZE > 256) +#error SG_CHUNK_SIZE is too large (256 MAX) +#endif +#endif +#endif +#endif + SP(SG_CHUNK_SIZE) +}; +#undef SP + +static inline unsigned int sg_pool_index(unsigned short nents) +{ + unsigned int index; + + BUG_ON(nents > SG_CHUNK_SIZE); + + if (nents <= 8) + index = 0; + else + index = get_count_order(nents) - 3; + + return index; +} + +static void sg_pool_free(struct scatterlist *sgl, unsigned int nents) +{ + struct sg_pool *sgp; + + sgp = sg_pools + sg_pool_index(nents); + mempool_free(sgl, sgp->pool); +} + +static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask) +{ + struct sg_pool *sgp; + + sgp = sg_pools + sg_pool_index(nents); + return mempool_alloc(sgp->pool, gfp_mask); +} + +/** + * sg_free_table_chained - Free a previously mapped sg table + * @table: The sg table header to use + * @first_chunk: was first_chunk not NULL in sg_alloc_table_chained? + * + * Description: + * Free an sg table previously allocated and setup with + * sg_alloc_table_chained(). + * + **/ +void sg_free_table_chained(struct sg_table *table, bool first_chunk) +{ + if (first_chunk && table->orig_nents <= SG_CHUNK_SIZE) + return; + __sg_free_table(table, SG_CHUNK_SIZE, first_chunk, sg_pool_free); +} +EXPORT_SYMBOL_GPL(sg_free_table_chained); + +/** + * sg_alloc_table_chained - Allocate and chain SGLs in an sg table + * @table: The sg table header to use + * @nents: Number of entries in sg list + * @first_chunk: first SGL + * + * Description: + * Allocate and chain SGLs in an sg table. If @nents@ is larger than + * SG_CHUNK_SIZE a chained sg table will be setup. + * + **/ +int sg_alloc_table_chained(struct sg_table *table, int nents, + struct scatterlist *first_chunk) +{ + int ret; + + BUG_ON(!nents); + + if (first_chunk) { + if (nents <= SG_CHUNK_SIZE) { + table->nents = table->orig_nents = nents; + sg_init_table(table->sgl, nents); + return 0; + } + } + + ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE, + first_chunk, GFP_ATOMIC, sg_pool_alloc); + if (unlikely(ret)) + sg_free_table_chained(table, (bool)first_chunk); + return ret; +} +EXPORT_SYMBOL_GPL(sg_alloc_table_chained); + +static __init int sg_pool_init(void) +{ + int i; + + for (i = 0; i < SG_MEMPOOL_NR; i++) { + struct sg_pool *sgp = sg_pools + i; + int size = sgp->size * sizeof(struct scatterlist); + + sgp->slab = kmem_cache_create(sgp->name, size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!sgp->slab) { + printk(KERN_ERR "SG_POOL: can't init sg slab %s\n", + sgp->name); + goto cleanup_sdb; + } + + sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE, + sgp->slab); + if (!sgp->pool) { + printk(KERN_ERR "SG_POOL: can't init sg mempool %s\n", + sgp->name); + goto cleanup_sdb; + } + } + + return 0; + +cleanup_sdb: + for (i = 0; i < SG_MEMPOOL_NR; i++) { + struct sg_pool *sgp = sg_pools + i; + if (sgp->pool) + mempool_destroy(sgp->pool); + if (sgp->slab) + kmem_cache_destroy(sgp->slab); + } + + return -ENOMEM; +} + +static __exit void sg_pool_exit(void) +{ + int i; + + for (i = 0; i < SG_MEMPOOL_NR; i++) { + struct sg_pool *sgp = sg_pools + i; + mempool_destroy(sgp->pool); + kmem_cache_destroy(sgp->slab); + } +} + +module_init(sg_pool_init); +module_exit(sg_pool_exit); -- cgit v1.2.3 From 464f664501816ef5fbbc00b8de96f4ae5a1c9325 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 14 Apr 2016 01:38:29 -0400 Subject: qed: Add infrastructure support for tunneling This patch adds various structure/APIs needed to configure/enable different tunnel [VXLAN/GRE/GENEVE] parameters on the adapter. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 46 ++++ drivers/net/ethernet/qlogic/qed/qed_dev.c | 6 +- drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 2 + drivers/net/ethernet/qlogic/qed/qed_hsi.h | 51 ++++- .../net/ethernet/qlogic/qed/qed_init_fw_funcs.c | 127 +++++++++++ drivers/net/ethernet/qlogic/qed/qed_l2.c | 31 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 31 +++ drivers/net/ethernet/qlogic/qed/qed_sp.h | 7 + drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 254 +++++++++++++++++++++ include/linux/qed/qed_eth_if.h | 10 + 11 files changed, 563 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 0f0d2d1d77e5..33e2ed60c18f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -74,6 +74,51 @@ struct qed_rt_data { bool *b_valid; }; +enum qed_tunn_mode { + QED_MODE_L2GENEVE_TUNN, + QED_MODE_IPGENEVE_TUNN, + QED_MODE_L2GRE_TUNN, + QED_MODE_IPGRE_TUNN, + QED_MODE_VXLAN_TUNN, +}; + +enum qed_tunn_clss { + QED_TUNN_CLSS_MAC_VLAN, + QED_TUNN_CLSS_MAC_VNI, + QED_TUNN_CLSS_INNER_MAC_VLAN, + QED_TUNN_CLSS_INNER_MAC_VNI, + MAX_QED_TUNN_CLSS, +}; + +struct qed_tunn_start_params { + unsigned long tunn_mode; + u16 vxlan_udp_port; + u16 geneve_udp_port; + u8 update_vxlan_udp_port; + u8 update_geneve_udp_port; + u8 tunn_clss_vxlan; + u8 tunn_clss_l2geneve; + u8 tunn_clss_ipgeneve; + u8 tunn_clss_l2gre; + u8 tunn_clss_ipgre; +}; + +struct qed_tunn_update_params { + unsigned long tunn_mode_update_mask; + unsigned long tunn_mode; + u16 vxlan_udp_port; + u16 geneve_udp_port; + u8 update_rx_pf_clss; + u8 update_tx_pf_clss; + u8 update_vxlan_udp_port; + u8 update_geneve_udp_port; + u8 tunn_clss_vxlan; + u8 tunn_clss_l2geneve; + u8 tunn_clss_ipgeneve; + u8 tunn_clss_l2gre; + u8 tunn_clss_ipgre; +}; + /* The PCI personality is not quite synonymous to protocol ID: * 1. All personalities need CORE connections * 2. The Ethernet personality may support also the RoCE protocol @@ -430,6 +475,7 @@ struct qed_dev { u8 num_hwfns; struct qed_hwfn hwfns[MAX_HWFNS_PER_DEVICE]; + unsigned long tunn_mode; u32 drv_type; struct qed_eth_stats *reset_stats; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b7d100f6bd6f..bdae5a55afa4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -558,6 +558,7 @@ static int qed_hw_init_port(struct qed_hwfn *p_hwfn, static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_tunn_start_params *p_tunn, int hw_mode, bool b_hw_start, enum qed_int_mode int_mode, @@ -625,7 +626,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, qed_int_igu_enable(p_hwfn, p_ptt, int_mode); /* send function start command */ - rc = qed_sp_pf_start(p_hwfn, p_hwfn->cdev->mf_mode); + rc = qed_sp_pf_start(p_hwfn, p_tunn, p_hwfn->cdev->mf_mode); if (rc) DP_NOTICE(p_hwfn, "Function start ramrod failed\n"); } @@ -672,6 +673,7 @@ static void qed_reset_mb_shadow(struct qed_hwfn *p_hwfn, } int qed_hw_init(struct qed_dev *cdev, + struct qed_tunn_start_params *p_tunn, bool b_hw_start, enum qed_int_mode int_mode, bool allow_npar_tx_switch, @@ -724,7 +726,7 @@ int qed_hw_init(struct qed_dev *cdev, /* Fall into */ case FW_MSG_CODE_DRV_LOAD_FUNCTION: rc = qed_hw_init_pf(p_hwfn, p_hwfn->p_main_ptt, - p_hwfn->hw_info.hw_mode, + p_tunn, p_hwfn->hw_info.hw_mode, b_hw_start, int_mode, allow_npar_tx_switch); break; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index d6c7ddf4f4d4..6aac3f855aa1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -62,6 +62,7 @@ void qed_resc_setup(struct qed_dev *cdev); * @brief qed_hw_init - * * @param cdev + * @param p_tunn * @param b_hw_start * @param int_mode - interrupt mode [msix, inta, etc.] to use. * @param allow_npar_tx_switch - npar tx switching to be used @@ -72,6 +73,7 @@ void qed_resc_setup(struct qed_dev *cdev); * @return int */ int qed_hw_init(struct qed_dev *cdev, + struct qed_tunn_start_params *p_tunn, bool b_hw_start, enum qed_int_mode int_mode, bool allow_npar_tx_switch, diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index a368f5e71d95..15e02ab9be5a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -46,7 +46,7 @@ enum common_ramrod_cmd_id { COMMON_RAMROD_PF_STOP /* PF Function Stop Ramrod */, COMMON_RAMROD_RESERVED, COMMON_RAMROD_RESERVED2, - COMMON_RAMROD_RESERVED3, + COMMON_RAMROD_PF_UPDATE, COMMON_RAMROD_EMPTY, MAX_COMMON_RAMROD_CMD_ID }; @@ -626,6 +626,42 @@ struct pf_start_ramrod_data { u8 reserved0[4]; }; +/* tunnel configuration */ +struct pf_update_tunnel_config { + u8 update_rx_pf_clss; + u8 update_tx_pf_clss; + u8 set_vxlan_udp_port_flg; + u8 set_geneve_udp_port_flg; + u8 tx_enable_vxlan; + u8 tx_enable_l2geneve; + u8 tx_enable_ipgeneve; + u8 tx_enable_l2gre; + u8 tx_enable_ipgre; + u8 tunnel_clss_vxlan; + u8 tunnel_clss_l2geneve; + u8 tunnel_clss_ipgeneve; + u8 tunnel_clss_l2gre; + u8 tunnel_clss_ipgre; + __le16 vxlan_udp_port; + __le16 geneve_udp_port; + __le16 reserved[3]; +}; + +struct pf_update_ramrod_data { + u32 reserved[2]; + u32 reserved_1[6]; + struct pf_update_tunnel_config tunnel_config; +}; + +/* Tunnel classification scheme */ +enum tunnel_clss { + TUNNEL_CLSS_MAC_VLAN = 0, + TUNNEL_CLSS_MAC_VNI, + TUNNEL_CLSS_INNER_MAC_VLAN, + TUNNEL_CLSS_INNER_MAC_VNI, + MAX_TUNNEL_CLSS +}; + enum ports_mode { ENGX2_PORTX1 /* 2 engines x 1 port */, ENGX2_PORTX2 /* 2 engines x 2 ports */, @@ -1603,6 +1639,19 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn, u16 start_pq, u16 num_pqs); +void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 dest_port); +void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool vxlan_enable); +void qed_set_gre_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool eth_gre_enable, + bool ip_gre_enable); +void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 dest_port); +void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool eth_geneve_enable, + bool ip_geneve_enable); + /* Ystorm flow control mode. Use enum fw_flow_ctrl_mode */ #define YSTORM_FLOW_CONTROL_MODE_OFFSET (IRO[0].base) #define YSTORM_FLOW_CONTROL_MODE_SIZE (IRO[0].size) diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c index f55ebdc3c832..1dd53248b984 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c @@ -788,3 +788,130 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn, return true; } + +static void +qed_set_tunnel_type_enable_bit(unsigned long *var, int bit, bool enable) +{ + if (enable) + set_bit(bit, var); + else + clear_bit(bit, var); +} + +#define PRS_ETH_TUNN_FIC_FORMAT -188897008 + +void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 dest_port) +{ + qed_wr(p_hwfn, p_ptt, PRS_REG_VXLAN_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, NIG_REG_VXLAN_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, PBF_REG_VXLAN_PORT, dest_port); +} + +void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + bool vxlan_enable) +{ + unsigned long reg_val = 0; + u8 shift; + + reg_val = qed_rd(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN); + shift = PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, vxlan_enable); + + qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val); + + if (reg_val) + qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0, + PRS_ETH_TUNN_FIC_FORMAT); + + reg_val = qed_rd(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE); + shift = NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, vxlan_enable); + + qed_wr(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE, reg_val); + + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_VXLAN_EN, + vxlan_enable ? 1 : 0); +} + +void qed_set_gre_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + bool eth_gre_enable, bool ip_gre_enable) +{ + unsigned long reg_val = 0; + u8 shift; + + reg_val = qed_rd(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN); + shift = PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, eth_gre_enable); + + shift = PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, ip_gre_enable); + qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val); + if (reg_val) + qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0, + PRS_ETH_TUNN_FIC_FORMAT); + + reg_val = qed_rd(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE); + shift = NIG_REG_ENC_TYPE_ENABLE_ETH_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, eth_gre_enable); + + shift = NIG_REG_ENC_TYPE_ENABLE_IP_OVER_GRE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, ip_gre_enable); + qed_wr(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE, reg_val); + + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_GRE_ETH_EN, + eth_gre_enable ? 1 : 0); + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_GRE_IP_EN, + ip_gre_enable ? 1 : 0); +} + +void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 dest_port) +{ + qed_wr(p_hwfn, p_ptt, PRS_REG_NGE_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_PORT, dest_port); + qed_wr(p_hwfn, p_ptt, PBF_REG_NGE_PORT, dest_port); +} + +void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + bool eth_geneve_enable, + bool ip_geneve_enable) +{ + unsigned long reg_val = 0; + u8 shift; + + reg_val = qed_rd(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN); + shift = PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, eth_geneve_enable); + + shift = PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_SHIFT; + qed_set_tunnel_type_enable_bit(®_val, shift, ip_geneve_enable); + + qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val); + if (reg_val) + qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0, + PRS_ETH_TUNN_FIC_FORMAT); + + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_ETH_ENABLE, + eth_geneve_enable ? 1 : 0); + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_IP_ENABLE, ip_geneve_enable ? 1 : 0); + + /* comp ver */ + reg_val = (ip_geneve_enable || eth_geneve_enable) ? 1 : 0; + qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_COMP_VER, reg_val); + qed_wr(p_hwfn, p_ptt, PBF_REG_NGE_COMP_VER, reg_val); + qed_wr(p_hwfn, p_ptt, PRS_REG_NGE_COMP_VER, reg_val); + + /* EDPM with geneve tunnel not supported in BB_B0 */ + if (QED_IS_BB_B0(p_hwfn->cdev)) + return; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_NGE_ETH_EN, + eth_geneve_enable ? 1 : 0); + qed_wr(p_hwfn, p_ptt, DORQ_REG_L2_EDPM_TUNNEL_NGE_IP_EN, + ip_geneve_enable ? 1 : 0); +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 5005497ee23e..fb5f3b815340 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1884,6 +1884,36 @@ static int qed_stop_txq(struct qed_dev *cdev, return 0; } +static int qed_tunn_configure(struct qed_dev *cdev, + struct qed_tunn_params *tunn_params) +{ + struct qed_tunn_update_params tunn_info; + int i, rc; + + memset(&tunn_info, 0, sizeof(tunn_info)); + if (tunn_params->update_vxlan_port == 1) { + tunn_info.update_vxlan_udp_port = 1; + tunn_info.vxlan_udp_port = tunn_params->vxlan_port; + } + + if (tunn_params->update_geneve_port == 1) { + tunn_info.update_geneve_udp_port = 1; + tunn_info.geneve_udp_port = tunn_params->geneve_port; + } + + for_each_hwfn(cdev, i) { + struct qed_hwfn *hwfn = &cdev->hwfns[i]; + + rc = qed_sp_pf_update_tunn_cfg(hwfn, &tunn_info, + QED_SPQ_MODE_EBLOCK, NULL); + + if (rc) + return rc; + } + + return 0; +} + static int qed_configure_filter_rx_mode(struct qed_dev *cdev, enum qed_filter_rx_mode_type type) { @@ -2026,6 +2056,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = { .fastpath_stop = &qed_fastpath_stop, .eth_cqe_completion = &qed_fp_cqe_completion, .get_vport_stats = &qed_get_vport_stats, + .tunn_config = &qed_tunn_configure, }; const struct qed_eth_ops *qed_get_eth_ops(void) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c31d485f72d6..1916992ae8b1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -776,7 +776,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, /* Start the slowpath */ data = cdev->firmware->data; - rc = qed_hw_init(cdev, true, cdev->int_params.out.int_mode, + rc = qed_hw_init(cdev, NULL, true, cdev->int_params.out.int_mode, true, data); if (rc) goto err2; diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index c15b1622e636..55451a4dc587 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -427,4 +427,35 @@ 0x2aae60UL #define PGLUE_B_REG_PF_BAR1_SIZE \ 0x2aae64UL +#define PRS_REG_ENCAPSULATION_TYPE_EN 0x1f0730UL +#define PRS_REG_GRE_PROTOCOL 0x1f0734UL +#define PRS_REG_VXLAN_PORT 0x1f0738UL +#define PRS_REG_OUTPUT_FORMAT_4_0 0x1f099cUL +#define NIG_REG_ENC_TYPE_ENABLE 0x501058UL + +#define NIG_REG_ENC_TYPE_ENABLE_ETH_OVER_GRE_ENABLE (0x1 << 0) +#define NIG_REG_ENC_TYPE_ENABLE_ETH_OVER_GRE_ENABLE_SHIFT 0 +#define NIG_REG_ENC_TYPE_ENABLE_IP_OVER_GRE_ENABLE (0x1 << 1) +#define NIG_REG_ENC_TYPE_ENABLE_IP_OVER_GRE_ENABLE_SHIFT 1 +#define NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE (0x1 << 2) +#define NIG_REG_ENC_TYPE_ENABLE_VXLAN_ENABLE_SHIFT 2 + +#define NIG_REG_VXLAN_PORT 0x50105cUL +#define PBF_REG_VXLAN_PORT 0xd80518UL +#define PBF_REG_NGE_PORT 0xd8051cUL +#define PRS_REG_NGE_PORT 0x1f086cUL +#define NIG_REG_NGE_PORT 0x508b38UL + +#define DORQ_REG_L2_EDPM_TUNNEL_GRE_ETH_EN 0x10090cUL +#define DORQ_REG_L2_EDPM_TUNNEL_GRE_IP_EN 0x100910UL +#define DORQ_REG_L2_EDPM_TUNNEL_VXLAN_EN 0x100914UL +#define DORQ_REG_L2_EDPM_TUNNEL_NGE_IP_EN 0x10092cUL +#define DORQ_REG_L2_EDPM_TUNNEL_NGE_ETH_EN 0x100930UL + +#define NIG_REG_NGE_IP_ENABLE 0x508b28UL +#define NIG_REG_NGE_ETH_ENABLE 0x508b2cUL +#define NIG_REG_NGE_COMP_VER 0x508b30UL +#define PBF_REG_NGE_COMP_VER 0xd80524UL +#define PRS_REG_NGE_COMP_VER 0x1f0878UL + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index d39f914b66ee..4b91cb32f317 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -52,6 +52,7 @@ int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn, union ramrod_data { struct pf_start_ramrod_data pf_start; + struct pf_update_ramrod_data pf_update; struct rx_queue_start_ramrod_data rx_queue_start; struct rx_queue_update_ramrod_data rx_queue_update; struct rx_queue_stop_ramrod_data rx_queue_stop; @@ -338,12 +339,14 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, * to the internal RAM of the UStorm by the Function Start Ramrod. * * @param p_hwfn + * @param p_tunn * @param mode * * @return int */ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, + struct qed_tunn_start_params *p_tunn, enum qed_mf_mode mode); /** @@ -362,4 +365,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, int qed_sp_pf_stop(struct qed_hwfn *p_hwfn); +int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_tunn, + enum spq_mode comp_mode, + struct qed_spq_comp_cb *p_comp_data); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 1c06c37d4c3d..306da7000ddc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -87,7 +87,217 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, return 0; } +static enum tunnel_clss qed_tunn_get_clss_type(u8 type) +{ + switch (type) { + case QED_TUNN_CLSS_MAC_VLAN: + return TUNNEL_CLSS_MAC_VLAN; + case QED_TUNN_CLSS_MAC_VNI: + return TUNNEL_CLSS_MAC_VNI; + case QED_TUNN_CLSS_INNER_MAC_VLAN: + return TUNNEL_CLSS_INNER_MAC_VLAN; + case QED_TUNN_CLSS_INNER_MAC_VNI: + return TUNNEL_CLSS_INNER_MAC_VNI; + default: + return TUNNEL_CLSS_MAC_VLAN; + } +} + +static void +qed_tunn_set_pf_fix_tunn_mode(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_src, + struct pf_update_tunnel_config *p_tunn_cfg) +{ + unsigned long cached_tunn_mode = p_hwfn->cdev->tunn_mode; + unsigned long update_mask = p_src->tunn_mode_update_mask; + unsigned long tunn_mode = p_src->tunn_mode; + unsigned long new_tunn_mode = 0; + + if (test_bit(QED_MODE_L2GRE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_L2GRE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_L2GRE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_L2GRE_TUNN, &new_tunn_mode); + } + + if (test_bit(QED_MODE_IPGRE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_IPGRE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_IPGRE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_IPGRE_TUNN, &new_tunn_mode); + } + + if (test_bit(QED_MODE_VXLAN_TUNN, &update_mask)) { + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + __set_bit(QED_MODE_VXLAN_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_VXLAN_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_VXLAN_TUNN, &new_tunn_mode); + } + + if (p_src->update_geneve_udp_port) { + p_tunn_cfg->set_geneve_udp_port_flg = 1; + p_tunn_cfg->geneve_udp_port = + cpu_to_le16(p_src->geneve_udp_port); + } + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_L2GENEVE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_L2GENEVE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_L2GENEVE_TUNN, &new_tunn_mode); + } + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &update_mask)) { + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + __set_bit(QED_MODE_IPGENEVE_TUNN, &new_tunn_mode); + } else { + if (test_bit(QED_MODE_IPGENEVE_TUNN, &cached_tunn_mode)) + __set_bit(QED_MODE_IPGENEVE_TUNN, &new_tunn_mode); + } + + p_src->tunn_mode = new_tunn_mode; +} + +static void +qed_tunn_set_pf_update_params(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_src, + struct pf_update_tunnel_config *p_tunn_cfg) +{ + unsigned long tunn_mode = p_src->tunn_mode; + enum tunnel_clss type; + + qed_tunn_set_pf_fix_tunn_mode(p_hwfn, p_src, p_tunn_cfg); + p_tunn_cfg->update_rx_pf_clss = p_src->update_rx_pf_clss; + p_tunn_cfg->update_tx_pf_clss = p_src->update_tx_pf_clss; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_vxlan); + p_tunn_cfg->tunnel_clss_vxlan = type; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2gre); + p_tunn_cfg->tunnel_clss_l2gre = type; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgre); + p_tunn_cfg->tunnel_clss_ipgre = type; + + if (p_src->update_vxlan_udp_port) { + p_tunn_cfg->set_vxlan_udp_port_flg = 1; + p_tunn_cfg->vxlan_udp_port = cpu_to_le16(p_src->vxlan_udp_port); + } + + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2gre = 1; + + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgre = 1; + + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_vxlan = 1; + + if (p_src->update_geneve_udp_port) { + p_tunn_cfg->set_geneve_udp_port_flg = 1; + p_tunn_cfg->geneve_udp_port = + cpu_to_le16(p_src->geneve_udp_port); + } + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2geneve = 1; + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgeneve = 1; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2geneve); + p_tunn_cfg->tunnel_clss_l2geneve = type; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgeneve); + p_tunn_cfg->tunnel_clss_ipgeneve = type; +} + +static void qed_set_hw_tunn_mode(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + unsigned long tunn_mode) +{ + u8 l2gre_enable = 0, ipgre_enable = 0, vxlan_enable = 0; + u8 l2geneve_enable = 0, ipgeneve_enable = 0; + + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + l2gre_enable = 1; + + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + ipgre_enable = 1; + + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + vxlan_enable = 1; + + qed_set_gre_enable(p_hwfn, p_ptt, l2gre_enable, ipgre_enable); + qed_set_vxlan_enable(p_hwfn, p_ptt, vxlan_enable); + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + l2geneve_enable = 1; + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + ipgeneve_enable = 1; + + qed_set_geneve_enable(p_hwfn, p_ptt, l2geneve_enable, + ipgeneve_enable); +} + +static void +qed_tunn_set_pf_start_params(struct qed_hwfn *p_hwfn, + struct qed_tunn_start_params *p_src, + struct pf_start_tunnel_config *p_tunn_cfg) +{ + unsigned long tunn_mode; + enum tunnel_clss type; + + if (!p_src) + return; + + tunn_mode = p_src->tunn_mode; + type = qed_tunn_get_clss_type(p_src->tunn_clss_vxlan); + p_tunn_cfg->tunnel_clss_vxlan = type; + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2gre); + p_tunn_cfg->tunnel_clss_l2gre = type; + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgre); + p_tunn_cfg->tunnel_clss_ipgre = type; + + if (p_src->update_vxlan_udp_port) { + p_tunn_cfg->set_vxlan_udp_port_flg = 1; + p_tunn_cfg->vxlan_udp_port = cpu_to_le16(p_src->vxlan_udp_port); + } + + if (test_bit(QED_MODE_L2GRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2gre = 1; + + if (test_bit(QED_MODE_IPGRE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgre = 1; + + if (test_bit(QED_MODE_VXLAN_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_vxlan = 1; + + if (p_src->update_geneve_udp_port) { + p_tunn_cfg->set_geneve_udp_port_flg = 1; + p_tunn_cfg->geneve_udp_port = + cpu_to_le16(p_src->geneve_udp_port); + } + + if (test_bit(QED_MODE_L2GENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_l2geneve = 1; + + if (test_bit(QED_MODE_IPGENEVE_TUNN, &tunn_mode)) + p_tunn_cfg->tx_enable_ipgeneve = 1; + + type = qed_tunn_get_clss_type(p_src->tunn_clss_l2geneve); + p_tunn_cfg->tunnel_clss_l2geneve = type; + type = qed_tunn_get_clss_type(p_src->tunn_clss_ipgeneve); + p_tunn_cfg->tunnel_clss_ipgeneve = type; +} + int qed_sp_pf_start(struct qed_hwfn *p_hwfn, + struct qed_tunn_start_params *p_tunn, enum qed_mf_mode mode) { struct pf_start_ramrod_data *p_ramrod = NULL; @@ -143,6 +353,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr, p_hwfn->p_consq->chain.pbl.p_phys_table); + qed_tunn_set_pf_start_params(p_hwfn, NULL, NULL); p_hwfn->hw_info.personality = PERSONALITY_ETH; DP_VERBOSE(p_hwfn, QED_MSG_SPQ, @@ -153,6 +364,49 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } +/* Set pf update ramrod command params */ +int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn, + struct qed_tunn_update_params *p_tunn, + enum spq_mode comp_mode, + struct qed_spq_comp_cb *p_comp_data) +{ + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = comp_mode; + init_data.p_comp_data = p_comp_data; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + COMMON_RAMROD_PF_UPDATE, PROTOCOLID_COMMON, + &init_data); + if (rc) + return rc; + + qed_tunn_set_pf_update_params(p_hwfn, p_tunn, + &p_ent->ramrod.pf_update.tunnel_config); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + return rc; + + if (p_tunn->update_vxlan_udp_port) + qed_set_vxlan_dest_port(p_hwfn, p_hwfn->p_main_ptt, + p_tunn->vxlan_udp_port); + if (p_tunn->update_geneve_udp_port) + qed_set_geneve_dest_port(p_hwfn, p_hwfn->p_main_ptt, + p_tunn->geneve_udp_port); + + qed_set_hw_tunn_mode(p_hwfn, p_hwfn->p_main_ptt, p_tunn->tunn_mode); + p_hwfn->cdev->tunn_mode = p_tunn->tunn_mode; + + return rc; +} + int qed_sp_pf_stop(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 795c9902e02f..3a4c806be156 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -112,6 +112,13 @@ struct qed_queue_start_common_params { u16 sb_idx; }; +struct qed_tunn_params { + u16 vxlan_port; + u8 update_vxlan_port; + u16 geneve_port; + u8 update_geneve_port; +}; + struct qed_eth_cb_ops { struct qed_common_cb_ops common; }; @@ -166,6 +173,9 @@ struct qed_eth_ops { void (*get_vport_stats)(struct qed_dev *cdev, struct qed_eth_stats *stats); + + int (*tunn_config)(struct qed_dev *cdev, + struct qed_tunn_params *params); }; const struct qed_eth_ops *qed_get_eth_ops(void); -- cgit v1.2.3 From e1c9c62b9a3a761b56359a7437215ae2e9253821 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 11 Apr 2016 23:10:21 +0300 Subject: net/mlx5: Fix mlx5 ifc cmd_hca_cap bad offsets All reserved fields after early_vf_enable are off by 1, since early_vf_enable was not explicitly declared as array of size 1. Reserved field before cqe_zip had a wrong size, it should be 0x80 + 0x3f. Fixes: b0844444590e ("net/mlx5_core: Introduce access function to read internal timer ") Fixes: b4ff3a36d3e4 ("net/mlx5: Use offset based reserved field names in the IFC header file") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 107 ++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c15b8a864937..c300e7491d80 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -750,21 +750,21 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; - u8 early_vf_enable; - u8 reserved_at_1a8[0x2]; + u8 early_vf_enable[0x1]; + u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; u8 reserved_at_1af[0x6]; u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_at_1bf[0x3]; + u8 reserved_at_1c0[0x3]; u8 log_max_msg[0x5]; - u8 reserved_at_1c7[0x4]; + u8 reserved_at_1c8[0x4]; u8 max_tc[0x4]; - u8 reserved_at_1cf[0x6]; + u8 reserved_at_1d0[0x6]; u8 rol_s[0x1]; u8 rol_g[0x1]; - u8 reserved_at_1d7[0x1]; + u8 reserved_at_1d8[0x1]; u8 wol_s[0x1]; u8 wol_g[0x1]; u8 wol_a[0x1]; @@ -774,47 +774,47 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 wol_p[0x1]; u8 stat_rate_support[0x10]; - u8 reserved_at_1ef[0xc]; + u8 reserved_at_1f0[0xc]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; u8 reserved_at_200[0x3]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_204[0xa]; + u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_at_212[0x1]; + u8 reserved_at_213[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_at_215[0x1]; + u8 reserved_at_216[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; - u8 reserved_at_21a[0x1]; + u8 reserved_at_21b[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 reserved_at_21e[0x1]; + u8 reserved_at_21f[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 reserved_at_222[0x3]; + u8 reserved_at_223[0x3]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_at_228[0x1]; + u8 reserved_at_229[0x1]; u8 scqe_break_moderation[0x1]; u8 reserved_at_22a[0x1]; u8 cd[0x1]; - u8 reserved_at_22c[0x1]; + u8 reserved_at_22d[0x1]; u8 apm[0x1]; u8 vector_calc[0x1]; u8 reserved_at_22f[0x1]; u8 imaicl[0x1]; - u8 reserved_at_231[0x4]; + u8 reserved_at_232[0x4]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; @@ -824,98 +824,101 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_at_23f[0xa]; + u8 reserved_at_240[0xa]; u8 uar_sz[0x6]; - u8 reserved_at_24f[0x8]; + u8 reserved_at_250[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_at_260[0x1]; + u8 reserved_at_261[0x1]; u8 pad_tx_eth_packet[0x1]; - u8 reserved_at_262[0x8]; + u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_at_26f[0x10]; + u8 reserved_at_270[0x10]; - u8 reserved_at_27f[0x10]; + u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_29f[0x10]; + u8 reserved_at_2a0[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_at_2bf[0x10]; + u8 reserved_at_2c0[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_at_2df[0x7]; + u8 reserved_at_2e0[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_at_2ff[0x18]; + u8 reserved_at_300[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_at_31f[0x3]; + u8 reserved_at_320[0x3]; u8 log_max_transport_domain[0x5]; - u8 reserved_at_327[0x3]; + u8 reserved_at_328[0x3]; u8 log_max_pd[0x5]; - u8 reserved_at_32f[0xb]; + u8 reserved_at_330[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_at_33f[0x20]; + u8 reserved_at_340[0x20]; - u8 reserved_at_35f[0x3]; + u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; - u8 reserved_at_367[0x3]; + u8 reserved_at_368[0x3]; u8 log_max_sq[0x5]; - u8 reserved_at_36f[0x3]; + u8 reserved_at_370[0x3]; u8 log_max_tir[0x5]; - u8 reserved_at_377[0x3]; + u8 reserved_at_378[0x3]; u8 log_max_tis[0x5]; u8 basic_cyclic_rcv_wqe[0x1]; - u8 reserved_at_380[0x2]; + u8 reserved_at_381[0x2]; u8 log_max_rmp[0x5]; - u8 reserved_at_387[0x3]; + u8 reserved_at_388[0x3]; u8 log_max_rqt[0x5]; - u8 reserved_at_38f[0x3]; + u8 reserved_at_390[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_at_397[0x3]; + u8 reserved_at_398[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_at_39f[0x3]; + u8 reserved_at_3a0[0x3]; u8 log_max_stride_sz_rq[0x5]; - u8 reserved_at_3a7[0x3]; + u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; - u8 reserved_at_3af[0x3]; + u8 reserved_at_3b0[0x3]; u8 log_max_stride_sz_sq[0x5]; - u8 reserved_at_3b7[0x3]; + u8 reserved_at_3b8[0x3]; u8 log_min_stride_sz_sq[0x5]; - u8 reserved_at_3bf[0x1b]; + u8 reserved_at_3c0[0x1b]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 reserved_at_3e0[0xa]; + u8 reserved_at_3e1[0xa]; u8 log_max_vlan_list[0x5]; - u8 reserved_at_3ef[0x3]; + u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; - u8 reserved_at_3f7[0x3]; + u8 reserved_at_3f8[0x3]; u8 log_max_current_uc_list[0x5]; - u8 reserved_at_3ff[0x80]; + u8 reserved_at_400[0x80]; - u8 reserved_at_47f[0x3]; + u8 reserved_at_480[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_at_487[0x8]; + u8 reserved_at_488[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_at_49f[0x20]; + u8 reserved_at_4a0[0x20]; u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; - u8 reserved_at_4ff[0x5f]; + + u8 reserved_at_500[0x80]; + + u8 reserved_at_580[0x3f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_at_57f[0x220]; + u8 reserved_at_5e0[0x220]; }; enum mlx5_flow_destination_type { -- cgit v1.2.3 From 7d5e14237a551a5de3d287f2e8db2d044ee81a1a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 11 Apr 2016 23:10:22 +0300 Subject: net/mlx5: Update mlx5_ifc hardware features Adding the needed mlx5_ifc hardware bits and structs for the following feature: * Add vport to steering commands for SRIOV ACL support * Add mlcr, pcmr and mcia registers for dump module EEPROM * Add support for FCS, baeacon led and disable_link bits to hca caps * Add CQE period mode bit in CQ context for CQE based CQ moderation support * Add umr SQ bit for fragmented memory registration * Add needed bits and caps for Striding RQ support Signed-off-by: Saeed Mahameed Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 146 +++++++++++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c300e7491d80..4ce4ea422a10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -513,7 +513,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 max_lso_cap[0x5]; u8 reserved_at_10[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_at_18[0x3]; + u8 reg_umr_sq[0x1]; + u8 scatter_fcs[0x1]; + u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; @@ -648,7 +650,7 @@ struct mlx5_ifc_vector_calc_cap_bits { enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, - MLX5_WQ_TYPE_STRQ = 0x2, + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, }; enum { @@ -753,7 +755,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 early_vf_enable[0x1]; u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; - u8 reserved_at_1af[0x6]; + u8 reserved_at_1af[0x2]; + u8 ports_check[0x1]; + u8 reserved_at_1b2[0x1]; + u8 disable_link_up[0x1]; + u8 beacon_led[0x1]; u8 port_type[0x2]; u8 num_ports[0x8]; @@ -778,7 +784,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; - u8 reserved_at_200[0x3]; + u8 striding_rq[0x1]; + u8 reserved_at_201[0x2]; u8 ipoib_basic_offloads[0x1]; u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; @@ -807,12 +814,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 block_lb_mc[0x1]; u8 reserved_at_229[0x1]; u8 scqe_break_moderation[0x1]; - u8 reserved_at_22a[0x1]; + u8 cq_period_start_from_cqe[0x1]; u8 cd[0x1]; u8 reserved_at_22d[0x1]; u8 apm[0x1]; u8 vector_calc[0x1]; - u8 reserved_at_22f[0x1]; + u8 umr_ptr_rlky[0x1]; u8 imaicl[0x1]; u8 reserved_at_232[0x4]; u8 qkv[0x1]; @@ -913,10 +920,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_500[0x80]; u8 reserved_at_580[0x3f]; - u8 cqe_zip[0x1]; + u8 cqe_compression[0x1]; - u8 cqe_zip_timeout[0x10]; - u8 cqe_zip_max_num[0x10]; + u8 cqe_compression_timeout[0x10]; + u8 cqe_compression_max_num[0x10]; u8 reserved_at_5e0[0x220]; }; @@ -1000,7 +1007,13 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x4e0]; + u8 reserved_at_120[0x15]; + u8 log_wqe_num_of_strides[0x3]; + u8 two_byte_shift_en[0x1]; + u8 reserved_at_139[0x4]; + u8 log_wqe_stride_size[0x3]; + + u8 reserved_at_140[0x4c0]; struct mlx5_ifc_cmd_pas_bits pas[0]; }; @@ -2199,7 +2212,8 @@ struct mlx5_ifc_sqc_bits { u8 flush_in_error_en[0x1]; u8 reserved_at_4[0x4]; u8 state[0x4]; - u8 reserved_at_c[0x14]; + u8 reg_umr[0x1]; + u8 reserved_at_d[0x13]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -2247,7 +2261,8 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x2]; + u8 reserved_at_1[0x1]; + u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; u8 state[0x4]; @@ -2604,6 +2619,11 @@ enum { MLX5_CQC_ST_FIRED = 0xa, }; +enum { + MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, + MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, +}; + struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x4]; @@ -2612,8 +2632,8 @@ struct mlx5_ifc_cqc_bits { u8 reserved_at_c[0x1]; u8 scqe_break_moderation_en[0x1]; u8 oi[0x1]; - u8 reserved_at_f[0x2]; - u8 cqe_zip_en[0x1]; + u8 cq_period_mode[0x2]; + u8 cqe_comp_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; u8 reserved_at_18[0x8]; @@ -2987,7 +3007,11 @@ struct mlx5_ifc_set_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5181,7 +5205,11 @@ struct mlx5_ifc_destroy_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5208,7 +5236,11 @@ struct mlx5_ifc_destroy_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5349,7 +5381,11 @@ struct mlx5_ifc_delete_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5795,7 +5831,11 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5839,7 +5879,11 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -6372,6 +6416,17 @@ struct mlx5_ifc_ptys_reg_bits { u8 reserved_at_1a0[0x60]; }; +struct mlx5_ifc_mlcr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x20]; + + u8 beacon_duration[0x10]; + u8 reserved_at_40[0x10]; + + u8 beacon_remain[0x10]; +}; + struct mlx5_ifc_ptas_reg_bits { u8 reserved_at_0[0x20]; @@ -6781,6 +6836,16 @@ struct mlx5_ifc_pamp_reg_bits { u8 index_data[18][0x10]; }; +struct mlx5_ifc_pcmr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x2e]; + u8 fcs_cap[0x1]; + u8 reserved_at_3f[0x1f]; + u8 fcs_chk[0x1]; + u8 reserved_at_5f[0x1]; +}; + struct mlx5_ifc_lane_2_module_mapping_bits { u8 reserved_at_0[0x6]; u8 rx_lane[0x2]; @@ -7117,6 +7182,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pspa_reg_bits pspa_reg; struct mlx5_ifc_ptas_reg_bits ptas_reg; struct mlx5_ifc_ptys_reg_bits ptys_reg; + struct mlx5_ifc_mlcr_reg_bits mlcr_reg; struct mlx5_ifc_pude_reg_bits pude_reg; struct mlx5_ifc_pvlc_reg_bits pvlc_reg; struct mlx5_ifc_slrg_reg_bits slrg_reg; @@ -7150,7 +7216,11 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -7181,7 +7251,9 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x20]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; u8 reserved_at_60[0x10]; u8 modify_field_select[0x10]; @@ -7247,4 +7319,34 @@ struct mlx5_ifc_qtct_reg_bits { u8 tclass[0x3]; }; +struct mlx5_ifc_mcia_reg_bits { + u8 l[0x1]; + u8 reserved_at_1[0x7]; + u8 module[0x8]; + u8 reserved_at_10[0x8]; + u8 status[0x8]; + + u8 i2c_device_address[0x8]; + u8 page_number[0x8]; + u8 device_address[0x10]; + + u8 reserved_at_40[0x10]; + u8 size[0x10]; + + u8 reserved_at_60[0x20]; + + u8 dword_0[0x20]; + u8 dword_1[0x20]; + u8 dword_2[0x20]; + u8 dword_3[0x20]; + u8 dword_4[0x20]; + u8 dword_5[0x20]; + u8 dword_6[0x20]; + u8 dword_7[0x20]; + u8 dword_8[0x20]; + u8 dword_9[0x20]; + u8 dword_10[0x20]; + u8 dword_11[0x20]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From 52c52a61a39fb319c14a582f8631619e5d5f55bf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 14 Apr 2016 15:35:30 +0800 Subject: sctp: add sctp_info dump api for sctp_diag sctp_diag will dump some important details of sctp's assoc or ep, we use sctp_info to describe them, sctp_get_sctp_info to get them, and export it to sctp_diag.ko. v2->v3: - we will not use list_for_each_safe in sctp_get_sctp_info, cause all the callers of it will use lock_sock. - fix the holes in struct sctp_info with __reserved* field. because sctp_diag is a new feature, and sctp_info is just for now, it may be changed in the future. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 67 ++++++++++++++++++++++++++++++++++++++ include/net/sctp/sctp.h | 3 ++ net/sctp/socket.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a9414fd49dc6..dacb5e711994 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -705,4 +705,71 @@ typedef struct sctp_auth_chunk { sctp_authhdr_t auth_hdr; } __packed sctp_auth_chunk_t; +struct sctp_info { + __u32 sctpi_tag; + __u32 sctpi_state; + __u32 sctpi_rwnd; + __u16 sctpi_unackdata; + __u16 sctpi_penddata; + __u16 sctpi_instrms; + __u16 sctpi_outstrms; + __u32 sctpi_fragmentation_point; + __u32 sctpi_inqueue; + __u32 sctpi_outqueue; + __u32 sctpi_overall_error; + __u32 sctpi_max_burst; + __u32 sctpi_maxseg; + __u32 sctpi_peer_rwnd; + __u32 sctpi_peer_tag; + __u8 sctpi_peer_capable; + __u8 sctpi_peer_sack; + __u16 __reserved1; + + /* assoc status info */ + __u64 sctpi_isacks; + __u64 sctpi_osacks; + __u64 sctpi_opackets; + __u64 sctpi_ipackets; + __u64 sctpi_rtxchunks; + __u64 sctpi_outofseqtsns; + __u64 sctpi_idupchunks; + __u64 sctpi_gapcnt; + __u64 sctpi_ouodchunks; + __u64 sctpi_iuodchunks; + __u64 sctpi_oodchunks; + __u64 sctpi_iodchunks; + __u64 sctpi_octrlchunks; + __u64 sctpi_ictrlchunks; + + /* primary transport info */ + struct sockaddr_storage sctpi_p_address; + __s32 sctpi_p_state; + __u32 sctpi_p_cwnd; + __u32 sctpi_p_srtt; + __u32 sctpi_p_rto; + __u32 sctpi_p_hbinterval; + __u32 sctpi_p_pathmaxrxt; + __u32 sctpi_p_sackdelay; + __u32 sctpi_p_sackfreq; + __u32 sctpi_p_ssthresh; + __u32 sctpi_p_partial_bytes_acked; + __u32 sctpi_p_flight_size; + __u16 sctpi_p_error; + __u16 __reserved2; + + /* sctp sock info */ + __u32 sctpi_s_autoclose; + __u32 sctpi_s_adaptation_ind; + __u32 sctpi_s_pd_point; + __u8 sctpi_s_nodelay; + __u8 sctpi_s_disable_fragments; + __u8 sctpi_s_v4mapped; + __u8 sctpi_s_frag_interleave; +}; + +struct sctp_infox { + struct sctp_info *sctpinfo; + struct sctp_association *asoc; +}; + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 978d5f67d5a7..268b10058ef5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -116,6 +116,9 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info); + /* * sctp/primitive.c */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bf265a4bba6e..cd0fb3bb493c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4202,6 +4202,92 @@ static void sctp_shutdown(struct sock *sk, int how) } } +int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info) +{ + struct sctp_transport *prim; + struct list_head *pos; + int mask; + + memset(info, 0, sizeof(*info)); + if (!asoc) { + struct sctp_sock *sp = sctp_sk(sk); + + info->sctpi_s_autoclose = sp->autoclose; + info->sctpi_s_adaptation_ind = sp->adaptation_ind; + info->sctpi_s_pd_point = sp->pd_point; + info->sctpi_s_nodelay = sp->nodelay; + info->sctpi_s_disable_fragments = sp->disable_fragments; + info->sctpi_s_v4mapped = sp->v4mapped; + info->sctpi_s_frag_interleave = sp->frag_interleave; + + return 0; + } + + info->sctpi_tag = asoc->c.my_vtag; + info->sctpi_state = asoc->state; + info->sctpi_rwnd = asoc->a_rwnd; + info->sctpi_unackdata = asoc->unack_data; + info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); + info->sctpi_instrms = asoc->c.sinit_max_instreams; + info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + list_for_each(pos, &asoc->base.inqueue.in_chunk_list) + info->sctpi_inqueue++; + list_for_each(pos, &asoc->outqueue.out_chunk_list) + info->sctpi_outqueue++; + info->sctpi_overall_error = asoc->overall_error_count; + info->sctpi_max_burst = asoc->max_burst; + info->sctpi_maxseg = asoc->frag_point; + info->sctpi_peer_rwnd = asoc->peer.rwnd; + info->sctpi_peer_tag = asoc->c.peer_vtag; + + mask = asoc->peer.ecn_capable << 1; + mask = (mask | asoc->peer.ipv4_address) << 1; + mask = (mask | asoc->peer.ipv6_address) << 1; + mask = (mask | asoc->peer.hostname_address) << 1; + mask = (mask | asoc->peer.asconf_capable) << 1; + mask = (mask | asoc->peer.prsctp_capable) << 1; + mask = (mask | asoc->peer.auth_capable); + info->sctpi_peer_capable = mask; + mask = asoc->peer.sack_needed << 1; + mask = (mask | asoc->peer.sack_generation) << 1; + mask = (mask | asoc->peer.zero_window_announced); + info->sctpi_peer_sack = mask; + + info->sctpi_isacks = asoc->stats.isacks; + info->sctpi_osacks = asoc->stats.osacks; + info->sctpi_opackets = asoc->stats.opackets; + info->sctpi_ipackets = asoc->stats.ipackets; + info->sctpi_rtxchunks = asoc->stats.rtxchunks; + info->sctpi_outofseqtsns = asoc->stats.outofseqtsns; + info->sctpi_idupchunks = asoc->stats.idupchunks; + info->sctpi_gapcnt = asoc->stats.gapcnt; + info->sctpi_ouodchunks = asoc->stats.ouodchunks; + info->sctpi_iuodchunks = asoc->stats.iuodchunks; + info->sctpi_oodchunks = asoc->stats.oodchunks; + info->sctpi_iodchunks = asoc->stats.iodchunks; + info->sctpi_octrlchunks = asoc->stats.octrlchunks; + info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; + + prim = asoc->peer.primary_path; + memcpy(&info->sctpi_p_address, &prim->ipaddr, + sizeof(struct sockaddr_storage)); + info->sctpi_p_state = prim->state; + info->sctpi_p_cwnd = prim->cwnd; + info->sctpi_p_srtt = prim->srtt; + info->sctpi_p_rto = jiffies_to_msecs(prim->rto); + info->sctpi_p_hbinterval = prim->hbinterval; + info->sctpi_p_pathmaxrxt = prim->pathmaxrxt; + info->sctpi_p_sackdelay = jiffies_to_msecs(prim->sackdelay); + info->sctpi_p_ssthresh = prim->ssthresh; + info->sctpi_p_partial_bytes_acked = prim->partial_bytes_acked; + info->sctpi_p_flight_size = prim->flight_size; + info->sctpi_p_error = prim->error_count; + + return 0; +} +EXPORT_SYMBOL_GPL(sctp_get_sctp_info); + /* 7.2.1 Association Status (SCTP_STATUS) * Applications can retrieve current status information about an -- cgit v1.2.3 From c5cc2a0bc930f1ae00b198aeb752acc3bdd4d5a7 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 16 Mar 2016 21:54:55 +0200 Subject: clk: ti: dpll: add support for specifying max rate for DPLLs DPLLs typically have a maximum rate they can support, and this varies from DPLL to DPLL. Add support of the maximum rate value to the DPLL data struct, and also add check for this in the DPLL round_rate function. Signed-off-by: Tero Kristo Reviewed-by: Nishanth Menon Cc: Tomi Valkeinen Cc: Lokesh Vutla Signed-off-by: Stephen Boyd --- drivers/clk/ti/clkt_dpll.c | 3 +++ include/linux/clk/ti.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c index 032c658a5f5e..b919fdfe8256 100644 --- a/drivers/clk/ti/clkt_dpll.c +++ b/drivers/clk/ti/clkt_dpll.c @@ -301,6 +301,9 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, dd = clk->dpll_data; + if (dd->max_rate && target_rate > dd->max_rate) + target_rate = dd->max_rate; + ref_rate = clk_hw_get_rate(dd->clk_ref); clk_name = clk_hw_get_name(hw); pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n", diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index dc5164a6df29..6110fe09ed18 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -37,6 +37,7 @@ * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate() * @min_divider: minimum valid non-bypass divider value (actual) * @max_divider: maximum valid non-bypass divider value (actual) + * @max_rate: maximum clock rate for the DPLL * @modes: possible values of @enable_mask * @autoidle_reg: register containing the DPLL autoidle mode bitfield * @idlest_reg: register containing the DPLL idle status bitfield @@ -81,6 +82,7 @@ struct dpll_data { u8 last_rounded_n; u8 min_divider; u16 max_divider; + unsigned long max_rate; u8 modes; void __iomem *autoidle_reg; void __iomem *idlest_reg; -- cgit v1.2.3 From af8a41271b56f6d79cb4d7c7f3ca688a2d97a801 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 15 Apr 2016 16:59:38 +0200 Subject: iio:adis: Add support for manual self-test flag clear Some variants of the devices from the ADIS family don't auto-clear the self-test bit after the self-test has completed. Instead we have to manually clear. Add support for this to the ADIS library. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis.c | 7 ++++++- include/linux/iio/imu/adis.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index 911255d41c1a..ad6f91d06185 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -324,7 +324,12 @@ static int adis_self_test(struct adis *adis) msleep(adis->data->startup_delay); - return adis_check_status(adis); + ret = adis_check_status(adis); + + if (adis->data->self_test_no_autoclear) + adis_write_reg_16(adis, adis->data->msc_ctrl_reg, 0x00); + + return ret; } /** diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index fa2d01ef8f55..360da7d18a3d 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -41,6 +41,7 @@ struct adis_data { unsigned int diag_stat_reg; unsigned int self_test_mask; + bool self_test_no_autoclear; unsigned int startup_delay; const char * const *status_error_msgs; -- cgit v1.2.3 From 756ca874417695f77941948a77e9b8562635cc0a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 14 Apr 2016 17:04:34 -0400 Subject: netdev_features: Add NETIF_F_TSO_MANGLEID to NETIF_F_ALL_TSO I realized that when I added NETIF_F_TSO_MANGLEID as a TSO type I forgot to add it to NETIF_F_ALL_TSO. This patch corrects that so the flag will be included correctly. The result should be minor as it was only used by a few drivers and in a few specific cases such as when NETIF_F_SG was not supported on a device so the TSO flags were cleared. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9fc79df0e561..15eb0b12fff9 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -164,7 +164,8 @@ enum { #define NETIF_F_CSUM_MASK (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ NETIF_F_HW_CSUM) -#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | \ + NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) #define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ NETIF_F_FSO) -- cgit v1.2.3 From 94be46b9e5e2954b6d5962750f8aae8b5f099baa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 18 Apr 2016 15:33:10 +0200 Subject: regulator: s2mps11: Set default ramp delay for S2MPS11 LDOs Driver did not provide default value for ramp delay for LDOs which lead to warning in dmesg, e.g. on Odroid XU4: [ 1.486076] vdd_ldo9: ramp_delay not set [ 1.506875] vddq_mmc2: ramp_delay not set [ 1.523766] vdd_ldo15: ramp_delay not set [ 1.544702] vdd_sd: ramp_delay not set The datasheet for all the S2MPS1x family is inconsistent here and does not specify unambiguously the value of ramp delay for LDO. It mentions 30 mV/us in one timing diagram but then omits it completely in LDO regulator characteristics table (it is specified for bucks). However the vendor kernels for Galaxy S5 and Odroid XU3 use values of 12 mV/us or 24 mV/us. Without the ramp delay value the consumers do not wait for voltage settle after changing it. Although the proper value of ramp delay for LDOs is unknown, it seems safer to use at least some value from reference kernel than to leave it unset. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- drivers/regulator/s2mps11.c | 1 + include/linux/mfd/samsung/core.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index 46e5a2922c4d..47c7de8f39e9 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -267,6 +267,7 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ + .ramp_delay = RAMP_DELAY_12_MVUS, \ .min_uV = MIN_800_MV, \ .uV_step = step, \ .n_voltages = S2MPS11_LDO_N_VOLTAGES, \ diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 6bc4bcd488ac..5a23dd4df432 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -30,6 +30,9 @@ #define MIN_600_MV 600000 #define MIN_500_MV 500000 +/* Ramp delay in uV/us */ +#define RAMP_DELAY_12_MVUS 12000 + /* Macros to represent steps for LDO/BUCK */ #define STEP_50_MV 50000 #define STEP_25_MV 25000 -- cgit v1.2.3 From 6d62b4d5fac620ee0ca65dc6d99b0306d96bc541 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Fri, 15 Apr 2016 00:34:59 +0200 Subject: net: ethtool: export conversion function between u32 and link mode The function convert_legacy_u32_to_link_mode and convert_link_mode_to_legacy_u32 may be used outside of ethtool.c. We rename them to ethtool_convert_... and export them, so we could use them in others drivers and modules. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- include/linux/ethtool.h | 7 +++++++ net/core/ethtool.c | 21 ++++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e2b7bf27c03e..9ded8c6d8176 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -150,6 +150,13 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, + u32 legacy_u32); + +/* return false if src had higher bits set. lower bits always updated. */ +bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, + const unsigned long *src); + /** * struct ethtool_ops - optional netdev operations * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e0cf20a3b3dd..bdb4013581b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -391,15 +391,17 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) +void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, + u32 legacy_u32) { bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); dst[0] = legacy_u32; } +EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); /* return false if src had higher bits set. lower bits always updated. */ -static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, - const unsigned long *src) +bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, + const unsigned long *src) { bool retval = true; @@ -419,6 +421,7 @@ static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, *legacy_u32 = src[0]; return retval; } +EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); /* return false if legacy contained non-0 deprecated fields * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated @@ -441,13 +444,13 @@ convert_legacy_settings_to_link_ksettings( legacy_settings->maxrxpkt) retval = false; - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.supported, legacy_settings->supported); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.advertising, legacy_settings->advertising); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.lp_advertising, legacy_settings->lp_advertising); link_ksettings->base.speed @@ -486,13 +489,13 @@ convert_link_ksettings_to_legacy_settings( * __u32 maxrxpkt; */ - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->supported, link_ksettings->link_modes.supported); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->advertising, link_ksettings->link_modes.advertising); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->lp_advertising, link_ksettings->link_modes.lp_advertising); ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed); -- cgit v1.2.3 From 2d55173e71b06c5a369489852d972304e14189fd Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Fri, 15 Apr 2016 00:35:00 +0200 Subject: phy: add generic function to support ksetting support The old ethtool api (get_setting and set_setting) has generic phy functions phy_ethtool_sset and phy_ethtool_gset. To supprt the new ethtool api (get_link_ksettings and set_link_ksettings), we add generic phy function phy_ethtool_ksettings_get and phy_ethtool_ksettings_set. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 4 +++ 2 files changed, 85 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 5590b9c182c9..6f221c8c2a7f 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -362,6 +362,60 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) } EXPORT_SYMBOL(phy_ethtool_sset); +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd) +{ + u8 autoneg = cmd->base.autoneg; + u8 duplex = cmd->base.duplex; + u32 speed = cmd->base.speed; + u32 advertising; + + if (cmd->base.phy_address != phydev->mdio.addr) + return -EINVAL; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + /* We make sure that we don't pass unsupported values in to the PHY */ + advertising &= phydev->supported; + + /* Verify the settings we care about. */ + if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (autoneg == AUTONEG_ENABLE && advertising == 0) + return -EINVAL; + + if (autoneg == AUTONEG_DISABLE && + ((speed != SPEED_1000 && + speed != SPEED_100 && + speed != SPEED_10) || + (duplex != DUPLEX_HALF && + duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = autoneg; + + phydev->speed = speed; + + phydev->advertising = advertising; + + if (autoneg == AUTONEG_ENABLE) + phydev->advertising |= ADVERTISED_Autoneg; + else + phydev->advertising &= ~ADVERTISED_Autoneg; + + phydev->duplex = duplex; + + phydev->mdix = cmd->base.eth_tp_mdix_ctrl; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_set); + int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) { cmd->supported = phydev->supported; @@ -385,6 +439,33 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) } EXPORT_SYMBOL(phy_ethtool_gset); +int phy_ethtool_ksettings_get(struct phy_device *phydev, + struct ethtool_link_ksettings *cmd) +{ + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + phydev->supported); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + phydev->advertising); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + phydev->lp_advertising); + + cmd->base.speed = phydev->speed; + cmd->base.duplex = phydev->duplex; + if (phydev->interface == PHY_INTERFACE_MODE_MOCA) + cmd->base.port = PORT_BNC; + else + cmd->base.port = PORT_MII; + + cmd->base.phy_address = phydev->mdio.addr; + cmd->base.autoneg = phydev->autoneg; + cmd->base.eth_tp_mdix_ctrl = phydev->mdix; + + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_get); + /** * phy_mii_ioctl - generic PHY MII ioctl interface * @phydev: the phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 2abd7918f64f..be3f83bbdc0b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -805,6 +805,10 @@ void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_ethtool_ksettings_get(struct phy_device *phydev, + struct ethtool_link_ksettings *cmd); +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -- cgit v1.2.3 From 5ae74f2cc2f150c1a5cee54cabbd71dd0661285a Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 11 Apr 2016 10:13:18 +0800 Subject: ACPI / tables: Move table override mechanisms to tables.c This patch moves acpi_os_table_override() and acpi_os_physical_table_override() to tables.c. Along with the mechanisms, acpi_initrd_initialize_tables() is also moved to tables.c to form a static function. The following functions are renamed according to this change: 1. acpi_initrd_override() -> renamed to early_acpi_table_init(), which invokes acpi_table_initrd_init() 2. acpi_os_physical_table_override() -> which invokes acpi_table_initrd_override() 3. acpi_initialize_initrd_tables() -> renamed to acpi_table_initrd_scan() Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/setup.c | 2 +- drivers/acpi/internal.h | 1 - drivers/acpi/osl.c | 274 --------------------------------------------- drivers/acpi/tables.c | 292 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/acpi.h | 10 +- 5 files changed, 294 insertions(+), 285 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2367ae07eb76..902a6f7b1c04 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1139,7 +1139,7 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); #if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD) - acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); + early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start); #endif vsmp_init(); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 7c188472d9c2..1b0e6fd6b280 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -20,7 +20,6 @@ #define PREFIX "ACPI: " -void acpi_initrd_initialize_tables(void); acpi_status acpi_os_initialize1(void); void init_acpi_device_notify(void); int acpi_scan_init(void); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 814d5f83b75e..0796ad96dc32 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -602,280 +602,6 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, return AE_OK; } -static void acpi_table_taint(struct acpi_table_header *table) -{ - pr_warn(PREFIX - "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", - table->signature, table->oem_table_id); - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); -} - -#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE -#include -#include - -static u64 acpi_tables_addr; -static int all_tables_size; - -/* Copied from acpica/tbutils.c:acpi_tb_checksum() */ -static u8 __init acpi_table_checksum(u8 *buffer, u32 length) -{ - u8 sum = 0; - u8 *end = buffer + length; - - while (buffer < end) - sum = (u8) (sum + *(buffer++)); - return sum; -} - -/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ -static const char * const table_sigs[] = { - ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, - ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, - ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF, - ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET, - ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI, - ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA, - ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT, - ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, - ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL }; - -#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) - -#define ACPI_OVERRIDE_TABLES 64 -static struct cpio_data __initdata acpi_initrd_files[ACPI_OVERRIDE_TABLES]; -static DECLARE_BITMAP(acpi_initrd_installed, ACPI_OVERRIDE_TABLES); - -#define MAP_CHUNK_SIZE (NR_FIX_BTMAPS << PAGE_SHIFT) - -void __init acpi_initrd_override(void *data, size_t size) -{ - int sig, no, table_nr = 0, total_offset = 0; - long offset = 0; - struct acpi_table_header *table; - char cpio_path[32] = "kernel/firmware/acpi/"; - struct cpio_data file; - - if (data == NULL || size == 0) - return; - - for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) { - file = find_cpio_data(cpio_path, data, size, &offset); - if (!file.data) - break; - - data += offset; - size -= offset; - - if (file.size < sizeof(struct acpi_table_header)) { - pr_err("ACPI OVERRIDE: Table smaller than ACPI header [%s%s]\n", - cpio_path, file.name); - continue; - } - - table = file.data; - - for (sig = 0; table_sigs[sig]; sig++) - if (!memcmp(table->signature, table_sigs[sig], 4)) - break; - - if (!table_sigs[sig]) { - pr_err("ACPI OVERRIDE: Unknown signature [%s%s]\n", - cpio_path, file.name); - continue; - } - if (file.size != table->length) { - pr_err("ACPI OVERRIDE: File length does not match table length [%s%s]\n", - cpio_path, file.name); - continue; - } - if (acpi_table_checksum(file.data, table->length)) { - pr_err("ACPI OVERRIDE: Bad table checksum [%s%s]\n", - cpio_path, file.name); - continue; - } - - pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n", - table->signature, cpio_path, file.name, table->length); - - all_tables_size += table->length; - acpi_initrd_files[table_nr].data = file.data; - acpi_initrd_files[table_nr].size = file.size; - table_nr++; - } - if (table_nr == 0) - return; - - acpi_tables_addr = - memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT, - all_tables_size, PAGE_SIZE); - if (!acpi_tables_addr) { - WARN_ON(1); - return; - } - /* - * Only calling e820_add_reserve does not work and the - * tables are invalid (memory got used) later. - * memblock_reserve works as expected and the tables won't get modified. - * But it's not enough on X86 because ioremap will - * complain later (used by acpi_os_map_memory) that the pages - * that should get mapped are not marked "reserved". - * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area) - * works fine. - */ - memblock_reserve(acpi_tables_addr, all_tables_size); - arch_reserve_mem_area(acpi_tables_addr, all_tables_size); - - /* - * early_ioremap only can remap 256k one time. If we map all - * tables one time, we will hit the limit. Need to map chunks - * one by one during copying the same as that in relocate_initrd(). - */ - for (no = 0; no < table_nr; no++) { - unsigned char *src_p = acpi_initrd_files[no].data; - phys_addr_t size = acpi_initrd_files[no].size; - phys_addr_t dest_addr = acpi_tables_addr + total_offset; - phys_addr_t slop, clen; - char *dest_p; - - total_offset += size; - - while (size) { - slop = dest_addr & ~PAGE_MASK; - clen = size; - if (clen > MAP_CHUNK_SIZE - slop) - clen = MAP_CHUNK_SIZE - slop; - dest_p = early_ioremap(dest_addr & PAGE_MASK, - clen + slop); - memcpy(dest_p + slop, src_p, clen); - early_iounmap(dest_p, clen + slop); - src_p += clen; - dest_addr += clen; - size -= clen; - } - } -} - -acpi_status -acpi_os_physical_table_override(struct acpi_table_header *existing_table, - acpi_physical_address *address, u32 *length) -{ - int table_offset = 0; - int table_index = 0; - struct acpi_table_header *table; - u32 table_length; - - *length = 0; - *address = 0; - if (!acpi_tables_addr) - return AE_OK; - - while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { - table = acpi_os_map_memory(acpi_tables_addr + table_offset, - ACPI_HEADER_SIZE); - if (table_offset + table->length > all_tables_size) { - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); - WARN_ON(1); - return AE_OK; - } - - table_length = table->length; - - /* Only override tables matched */ - if (test_bit(table_index, acpi_initrd_installed) || - memcmp(existing_table->signature, table->signature, 4) || - memcmp(table->oem_table_id, existing_table->oem_table_id, - ACPI_OEM_TABLE_ID_SIZE)) { - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); - goto next_table; - } - - *length = table_length; - *address = acpi_tables_addr + table_offset; - acpi_table_taint(existing_table); - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); - set_bit(table_index, acpi_initrd_installed); - break; - -next_table: - table_offset += table_length; - table_index++; - } - return AE_OK; -} - -void __init acpi_initrd_initialize_tables(void) -{ - int table_offset = 0; - int table_index = 0; - u32 table_length; - struct acpi_table_header *table; - - if (!acpi_tables_addr) - return; - - while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { - table = acpi_os_map_memory(acpi_tables_addr + table_offset, - ACPI_HEADER_SIZE); - if (table_offset + table->length > all_tables_size) { - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); - WARN_ON(1); - return; - } - - table_length = table->length; - - /* Skip RSDT/XSDT which should only be used for override */ - if (test_bit(table_index, acpi_initrd_installed) || - ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) || - ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) { - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); - goto next_table; - } - - acpi_table_taint(table); - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); - acpi_install_table(acpi_tables_addr + table_offset, TRUE); - set_bit(table_index, acpi_initrd_installed); -next_table: - table_offset += table_length; - table_index++; - } -} -#else -acpi_status -acpi_os_physical_table_override(struct acpi_table_header *existing_table, - acpi_physical_address *address, - u32 *table_length) -{ - *table_length = 0; - *address = 0; - return AE_OK; -} - -void __init acpi_initrd_initialize_tables(void) -{ -} -#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ - -acpi_status -acpi_os_table_override(struct acpi_table_header *existing_table, - struct acpi_table_header **new_table) -{ - if (!existing_table || !new_table) - return AE_BAD_PARAMETER; - - *new_table = NULL; - -#ifdef CONFIG_ACPI_CUSTOM_DSDT - if (strncmp(existing_table->signature, "DSDT", 4) == 0) - *new_table = (struct acpi_table_header *)AmlCode; -#endif - if (*new_table != NULL) - acpi_table_taint(existing_table); - return AE_OK; -} - static irqreturn_t acpi_irq(int irq, void *dev_id) { u32 handled; diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index f49c02442d65..2e74dbf45dd4 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include "internal.h" #define ACPI_MAX_TABLES 128 @@ -433,6 +435,294 @@ static void __init check_multiple_madt(void) return; } +static void acpi_table_taint(struct acpi_table_header *table) +{ + pr_warn("Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", + table->signature, table->oem_table_id); + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); +} + +#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE +static u64 acpi_tables_addr; +static int all_tables_size; + +/* Copied from acpica/tbutils.c:acpi_tb_checksum() */ +static u8 __init acpi_table_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + return sum; +} + +/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ +static const char * const table_sigs[] = { + ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, + ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, + ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF, + ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET, + ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI, + ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA, + ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT, + ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, + ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL }; + +#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) + +#define ACPI_OVERRIDE_TABLES 64 +static struct cpio_data __initdata acpi_initrd_files[ACPI_OVERRIDE_TABLES]; +static DECLARE_BITMAP(acpi_initrd_installed, ACPI_OVERRIDE_TABLES); + +#define MAP_CHUNK_SIZE (NR_FIX_BTMAPS << PAGE_SHIFT) + +static void __init acpi_table_initrd_init(void *data, size_t size) +{ + int sig, no, table_nr = 0, total_offset = 0; + long offset = 0; + struct acpi_table_header *table; + char cpio_path[32] = "kernel/firmware/acpi/"; + struct cpio_data file; + + if (data == NULL || size == 0) + return; + + for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) { + file = find_cpio_data(cpio_path, data, size, &offset); + if (!file.data) + break; + + data += offset; + size -= offset; + + if (file.size < sizeof(struct acpi_table_header)) { + pr_err("ACPI OVERRIDE: Table smaller than ACPI header [%s%s]\n", + cpio_path, file.name); + continue; + } + + table = file.data; + + for (sig = 0; table_sigs[sig]; sig++) + if (!memcmp(table->signature, table_sigs[sig], 4)) + break; + + if (!table_sigs[sig]) { + pr_err("ACPI OVERRIDE: Unknown signature [%s%s]\n", + cpio_path, file.name); + continue; + } + if (file.size != table->length) { + pr_err("ACPI OVERRIDE: File length does not match table length [%s%s]\n", + cpio_path, file.name); + continue; + } + if (acpi_table_checksum(file.data, table->length)) { + pr_err("ACPI OVERRIDE: Bad table checksum [%s%s]\n", + cpio_path, file.name); + continue; + } + + pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n", + table->signature, cpio_path, file.name, table->length); + + all_tables_size += table->length; + acpi_initrd_files[table_nr].data = file.data; + acpi_initrd_files[table_nr].size = file.size; + table_nr++; + } + if (table_nr == 0) + return; + + acpi_tables_addr = + memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT, + all_tables_size, PAGE_SIZE); + if (!acpi_tables_addr) { + WARN_ON(1); + return; + } + /* + * Only calling e820_add_reserve does not work and the + * tables are invalid (memory got used) later. + * memblock_reserve works as expected and the tables won't get modified. + * But it's not enough on X86 because ioremap will + * complain later (used by acpi_os_map_memory) that the pages + * that should get mapped are not marked "reserved". + * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area) + * works fine. + */ + memblock_reserve(acpi_tables_addr, all_tables_size); + arch_reserve_mem_area(acpi_tables_addr, all_tables_size); + + /* + * early_ioremap only can remap 256k one time. If we map all + * tables one time, we will hit the limit. Need to map chunks + * one by one during copying the same as that in relocate_initrd(). + */ + for (no = 0; no < table_nr; no++) { + unsigned char *src_p = acpi_initrd_files[no].data; + phys_addr_t size = acpi_initrd_files[no].size; + phys_addr_t dest_addr = acpi_tables_addr + total_offset; + phys_addr_t slop, clen; + char *dest_p; + + total_offset += size; + + while (size) { + slop = dest_addr & ~PAGE_MASK; + clen = size; + if (clen > MAP_CHUNK_SIZE - slop) + clen = MAP_CHUNK_SIZE - slop; + dest_p = early_ioremap(dest_addr & PAGE_MASK, + clen + slop); + memcpy(dest_p + slop, src_p, clen); + early_iounmap(dest_p, clen + slop); + src_p += clen; + dest_addr += clen; + size -= clen; + } + } +} + +static acpi_status +acpi_table_initrd_override(struct acpi_table_header *existing_table, + acpi_physical_address *address, u32 *length) +{ + int table_offset = 0; + int table_index = 0; + struct acpi_table_header *table; + u32 table_length; + + *length = 0; + *address = 0; + if (!acpi_tables_addr) + return AE_OK; + + while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { + table = acpi_os_map_memory(acpi_tables_addr + table_offset, + ACPI_HEADER_SIZE); + if (table_offset + table->length > all_tables_size) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + WARN_ON(1); + return AE_OK; + } + + table_length = table->length; + + /* Only override tables matched */ + if (test_bit(table_index, acpi_initrd_installed) || + memcmp(existing_table->signature, table->signature, 4) || + memcmp(table->oem_table_id, existing_table->oem_table_id, + ACPI_OEM_TABLE_ID_SIZE)) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + goto next_table; + } + + *length = table_length; + *address = acpi_tables_addr + table_offset; + acpi_table_taint(existing_table); + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + set_bit(table_index, acpi_initrd_installed); + break; + +next_table: + table_offset += table_length; + table_index++; + } + return AE_OK; +} + +static void __init acpi_table_initrd_scan(void) +{ + int table_offset = 0; + int table_index = 0; + u32 table_length; + struct acpi_table_header *table; + + if (!acpi_tables_addr) + return; + + while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { + table = acpi_os_map_memory(acpi_tables_addr + table_offset, + ACPI_HEADER_SIZE); + if (table_offset + table->length > all_tables_size) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + WARN_ON(1); + return; + } + + table_length = table->length; + + /* Skip RSDT/XSDT which should only be used for override */ + if (test_bit(table_index, acpi_initrd_installed) || + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) || + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + goto next_table; + } + + acpi_table_taint(table); + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + acpi_install_table(acpi_tables_addr + table_offset, TRUE); + set_bit(table_index, acpi_initrd_installed); +next_table: + table_offset += table_length; + table_index++; + } +} +#else +static void __init acpi_table_initrd_init(void *data, size_t size) +{ +} + +static acpi_status +acpi_table_initrd_override(struct acpi_table_header *existing_table, + acpi_physical_address *address, + u32 *table_length) +{ + *table_length = 0; + *address = 0; + return AE_OK; +} + +static void __init acpi_table_initrd_scan(void) +{ +} +#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ + +acpi_status +acpi_os_physical_table_override(struct acpi_table_header *existing_table, + acpi_physical_address *address, + u32 *table_length) +{ + return acpi_table_initrd_override(existing_table, address, + table_length); +} + +acpi_status +acpi_os_table_override(struct acpi_table_header *existing_table, + struct acpi_table_header **new_table) +{ + if (!existing_table || !new_table) + return AE_BAD_PARAMETER; + + *new_table = NULL; + +#ifdef CONFIG_ACPI_CUSTOM_DSDT + if (strncmp(existing_table->signature, "DSDT", 4) == 0) + *new_table = (struct acpi_table_header *)AmlCode; +#endif + if (*new_table != NULL) + acpi_table_taint(existing_table); + return AE_OK; +} + +void __init early_acpi_table_init(void *data, size_t size) +{ + acpi_table_initrd_init(data, size); +} + /* * acpi_table_init() * @@ -457,7 +747,7 @@ int __init acpi_table_init(void) status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0); if (ACPI_FAILURE(status)) return -EINVAL; - acpi_initrd_initialize_tables(); + acpi_table_initrd_scan(); check_multiple_madt(); return 0; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 06ed7e54033e..7718c04aa09f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -190,14 +190,6 @@ static inline int acpi_debugger_notify_command_complete(void) } #endif -#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE -void acpi_initrd_override(void *data, size_t size); -#else -static inline void acpi_initrd_override(void *data, size_t size) -{ -} -#endif - #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) @@ -216,6 +208,7 @@ void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); +void early_acpi_table_init(void *data, size_t size); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init acpi_parse_entries(char *id, unsigned long table_size, @@ -596,6 +589,7 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) return NULL; } +static inline void early_acpi_table_init(void *data, size_t size) { } static inline void acpi_early_init(void) { } static inline void acpi_subsystem_init(void) { } -- cgit v1.2.3 From 20147f0d4f50f6f0d1fbe1815fe3d4d0a6444a70 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 29 Mar 2016 17:22:26 +0800 Subject: mfd: axp20x: Add support for AXP809 PMIC The X-Powers AXP809 is a new PMIC that is paired with Allwinner's A80 SoC, along with a slave AXP806 PMIC. This PMIC is quite similar to the earlier AXP223, though the interrupts and regulator have changed a bit. This patch adds support for the interrupts and power button of the PMIC. Signed-off-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- drivers/mfd/axp20x-rsb.c | 1 + drivers/mfd/axp20x.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/axp20x.h | 59 ++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/axp20x-rsb.c b/drivers/mfd/axp20x-rsb.c index 28c20247c112..a407527bcd09 100644 --- a/drivers/mfd/axp20x-rsb.c +++ b/drivers/mfd/axp20x-rsb.c @@ -61,6 +61,7 @@ static int axp20x_rsb_remove(struct sunxi_rsb_device *rdev) static const struct of_device_id axp20x_rsb_of_match[] = { { .compatible = "x-powers,axp223", .data = (void *)AXP223_ID }, + { .compatible = "x-powers,axp809", .data = (void *)AXP809_ID }, { }, }; MODULE_DEVICE_TABLE(of, axp20x_rsb_of_match); diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index a57d6e940610..1ce923277cc8 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -37,6 +37,7 @@ static const char * const axp20x_model_names[] = { "AXP221", "AXP223", "AXP288", + "AXP809", }; static const struct regmap_range axp152_writeable_ranges[] = { @@ -85,6 +86,7 @@ static const struct regmap_access_table axp20x_volatile_table = { .n_yes_ranges = ARRAY_SIZE(axp20x_volatile_ranges), }; +/* AXP22x ranges are shared with the AXP809, as they cover the same range */ static const struct regmap_range axp22x_writeable_ranges[] = { regmap_reg_range(AXP20X_DATACACHE(0), AXP20X_IRQ5_STATE), regmap_reg_range(AXP20X_DCDC_MODE, AXP22X_BATLOW_THRES1), @@ -211,6 +213,20 @@ static struct resource axp288_fuel_gauge_resources[] = { }, }; +static struct resource axp809_pek_resources[] = { + { + .name = "PEK_DBR", + .start = AXP809_IRQ_PEK_RIS_EDGE, + .end = AXP809_IRQ_PEK_RIS_EDGE, + .flags = IORESOURCE_IRQ, + }, { + .name = "PEK_DBF", + .start = AXP809_IRQ_PEK_FAL_EDGE, + .end = AXP809_IRQ_PEK_FAL_EDGE, + .flags = IORESOURCE_IRQ, + }, +}; + static const struct regmap_config axp152_regmap_config = { .reg_bits = 8, .val_bits = 8, @@ -378,6 +394,41 @@ static const struct regmap_irq axp288_regmap_irqs[] = { INIT_REGMAP_IRQ(AXP288, BC_USB_CHNG, 5, 1), }; +static const struct regmap_irq axp809_regmap_irqs[] = { + INIT_REGMAP_IRQ(AXP809, ACIN_OVER_V, 0, 7), + INIT_REGMAP_IRQ(AXP809, ACIN_PLUGIN, 0, 6), + INIT_REGMAP_IRQ(AXP809, ACIN_REMOVAL, 0, 5), + INIT_REGMAP_IRQ(AXP809, VBUS_OVER_V, 0, 4), + INIT_REGMAP_IRQ(AXP809, VBUS_PLUGIN, 0, 3), + INIT_REGMAP_IRQ(AXP809, VBUS_REMOVAL, 0, 2), + INIT_REGMAP_IRQ(AXP809, VBUS_V_LOW, 0, 1), + INIT_REGMAP_IRQ(AXP809, BATT_PLUGIN, 1, 7), + INIT_REGMAP_IRQ(AXP809, BATT_REMOVAL, 1, 6), + INIT_REGMAP_IRQ(AXP809, BATT_ENT_ACT_MODE, 1, 5), + INIT_REGMAP_IRQ(AXP809, BATT_EXIT_ACT_MODE, 1, 4), + INIT_REGMAP_IRQ(AXP809, CHARG, 1, 3), + INIT_REGMAP_IRQ(AXP809, CHARG_DONE, 1, 2), + INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_HIGH, 2, 7), + INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_HIGH_END, 2, 6), + INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_LOW, 2, 5), + INIT_REGMAP_IRQ(AXP809, BATT_CHG_TEMP_LOW_END, 2, 4), + INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_HIGH, 2, 3), + INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_HIGH_END, 2, 2), + INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_LOW, 2, 1), + INIT_REGMAP_IRQ(AXP809, BATT_ACT_TEMP_LOW_END, 2, 0), + INIT_REGMAP_IRQ(AXP809, DIE_TEMP_HIGH, 3, 7), + INIT_REGMAP_IRQ(AXP809, LOW_PWR_LVL1, 3, 1), + INIT_REGMAP_IRQ(AXP809, LOW_PWR_LVL2, 3, 0), + INIT_REGMAP_IRQ(AXP809, TIMER, 4, 7), + INIT_REGMAP_IRQ(AXP809, PEK_RIS_EDGE, 4, 6), + INIT_REGMAP_IRQ(AXP809, PEK_FAL_EDGE, 4, 5), + INIT_REGMAP_IRQ(AXP809, PEK_SHORT, 4, 4), + INIT_REGMAP_IRQ(AXP809, PEK_LONG, 4, 3), + INIT_REGMAP_IRQ(AXP809, PEK_OVER_OFF, 4, 2), + INIT_REGMAP_IRQ(AXP809, GPIO1_INPUT, 4, 1), + INIT_REGMAP_IRQ(AXP809, GPIO0_INPUT, 4, 0), +}; + static const struct regmap_irq_chip axp152_regmap_irq_chip = { .name = "axp152_irq_chip", .status_base = AXP152_IRQ1_STATE, @@ -428,6 +479,18 @@ static const struct regmap_irq_chip axp288_regmap_irq_chip = { }; +static const struct regmap_irq_chip axp809_regmap_irq_chip = { + .name = "axp809", + .status_base = AXP20X_IRQ1_STATE, + .ack_base = AXP20X_IRQ1_STATE, + .mask_base = AXP20X_IRQ1_EN, + .mask_invert = true, + .init_ack_masked = true, + .irqs = axp809_regmap_irqs, + .num_irqs = ARRAY_SIZE(axp809_regmap_irqs), + .num_regs = 5, +}; + static struct mfd_cell axp20x_cells[] = { { .name = "axp20x-pek", @@ -572,6 +635,16 @@ static struct mfd_cell axp288_cells[] = { }, }; +static struct mfd_cell axp809_cells[] = { + { + .name = "axp20x-pek", + .num_resources = ARRAY_SIZE(axp809_pek_resources), + .resources = axp809_pek_resources, + }, { + .name = "axp20x-regulator", + }, +}; + static struct axp20x_dev *axp20x_pm_power_off; static void axp20x_power_off(void) { @@ -631,6 +704,12 @@ int axp20x_match_device(struct axp20x_dev *axp20x) axp20x->regmap_cfg = &axp288_regmap_config; axp20x->regmap_irq_chip = &axp288_regmap_irq_chip; break; + case AXP809_ID: + axp20x->nr_cells = ARRAY_SIZE(axp809_cells); + axp20x->cells = axp809_cells; + axp20x->regmap_cfg = &axp22x_regmap_config; + axp20x->regmap_irq_chip = &axp809_regmap_irq_chip; + break; default: dev_err(dev, "unsupported AXP20X ID %lu\n", axp20x->variant); return -EINVAL; diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index d82e7d51372b..0be4982f08fe 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -20,6 +20,7 @@ enum { AXP221_ID, AXP223_ID, AXP288_ID, + AXP809_ID, NR_AXP20X_VARIANTS, }; @@ -264,6 +265,29 @@ enum { AXP22X_REG_ID_MAX, }; +enum { + AXP809_DCDC1 = 0, + AXP809_DCDC2, + AXP809_DCDC3, + AXP809_DCDC4, + AXP809_DCDC5, + AXP809_DC1SW, + AXP809_DC5LDO, + AXP809_ALDO1, + AXP809_ALDO2, + AXP809_ALDO3, + AXP809_ELDO1, + AXP809_ELDO2, + AXP809_ELDO3, + AXP809_DLDO1, + AXP809_DLDO2, + AXP809_RTC_LDO, + AXP809_LDO_IO0, + AXP809_LDO_IO1, + AXP809_SW, + AXP809_REG_ID_MAX, +}; + /* IRQs */ enum { AXP152_IRQ_LDO0IN_CONNECT = 1, @@ -390,6 +414,41 @@ enum axp288_irqs { AXP288_IRQ_BC_USB_CHNG, }; +enum axp809_irqs { + AXP809_IRQ_ACIN_OVER_V = 1, + AXP809_IRQ_ACIN_PLUGIN, + AXP809_IRQ_ACIN_REMOVAL, + AXP809_IRQ_VBUS_OVER_V, + AXP809_IRQ_VBUS_PLUGIN, + AXP809_IRQ_VBUS_REMOVAL, + AXP809_IRQ_VBUS_V_LOW, + AXP809_IRQ_BATT_PLUGIN, + AXP809_IRQ_BATT_REMOVAL, + AXP809_IRQ_BATT_ENT_ACT_MODE, + AXP809_IRQ_BATT_EXIT_ACT_MODE, + AXP809_IRQ_CHARG, + AXP809_IRQ_CHARG_DONE, + AXP809_IRQ_BATT_CHG_TEMP_HIGH, + AXP809_IRQ_BATT_CHG_TEMP_HIGH_END, + AXP809_IRQ_BATT_CHG_TEMP_LOW, + AXP809_IRQ_BATT_CHG_TEMP_LOW_END, + AXP809_IRQ_BATT_ACT_TEMP_HIGH, + AXP809_IRQ_BATT_ACT_TEMP_HIGH_END, + AXP809_IRQ_BATT_ACT_TEMP_LOW, + AXP809_IRQ_BATT_ACT_TEMP_LOW_END, + AXP809_IRQ_DIE_TEMP_HIGH, + AXP809_IRQ_LOW_PWR_LVL1, + AXP809_IRQ_LOW_PWR_LVL2, + AXP809_IRQ_TIMER, + AXP809_IRQ_PEK_RIS_EDGE, + AXP809_IRQ_PEK_FAL_EDGE, + AXP809_IRQ_PEK_SHORT, + AXP809_IRQ_PEK_LONG, + AXP809_IRQ_PEK_OVER_OFF, + AXP809_IRQ_GPIO1_INPUT, + AXP809_IRQ_GPIO0_INPUT, +}; + #define AXP288_TS_ADC_H 0x58 #define AXP288_TS_ADC_L 0x59 #define AXP288_GP_ADC_H 0x5a -- cgit v1.2.3 From a8f447be8056d9ce17bf7757d6de79426700bb8b Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Fri, 8 Apr 2016 00:12:55 +0530 Subject: mfd: Add resource managed APIs for mfd_add_devices Add resource managed API devm_mfd_add_devices() for the mfd_add_devices(). This helps in reducing code in error path as it is not required to call mfd_remove_devices() explicitly to remove all child-devices. In some cases, it also helps not to implement .remove() callback which get called during driver unbind. Signed-off-by: Laxman Dewangan Signed-off-by: Lee Jones --- drivers/mfd/mfd-core.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/mfd/core.h | 4 ++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 409da01effcd..4b4c1d4f3280 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -334,6 +334,44 @@ void mfd_remove_devices(struct device *parent) } EXPORT_SYMBOL(mfd_remove_devices); +static void devm_mfd_dev_release(struct device *dev, void *res) +{ + mfd_remove_devices(dev); +} + +/** + * devm_mfd_add_devices - Resource managed version of mfd_add_devices() + * + * Returns 0 on success or an appropriate negative error number on failure. + * All child-devices of the MFD will automatically be removed when it gets + * unbinded. + */ +int devm_mfd_add_devices(struct device *dev, int id, + const struct mfd_cell *cells, int n_devs, + struct resource *mem_base, + int irq_base, struct irq_domain *domain) +{ + struct device **ptr; + int ret; + + ptr = devres_alloc(devm_mfd_dev_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + ret = mfd_add_devices(dev, id, cells, n_devs, mem_base, + irq_base, domain); + if (ret < 0) { + devres_free(ptr); + return ret; + } + + *ptr = dev; + devres_add(dev, ptr); + + return ret; +} +EXPORT_SYMBOL(devm_mfd_add_devices); + int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones) { struct mfd_cell cell_entry; diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index bc6f7e00fb3d..4a0268afe546 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -131,4 +131,8 @@ static inline int mfd_add_hotplug_devices(struct device *parent, extern void mfd_remove_devices(struct device *parent); +extern int devm_mfd_add_devices(struct device *dev, int id, + const struct mfd_cell *cells, int n_devs, + struct resource *mem_base, + int irq_base, struct irq_domain *irq_domain); #endif -- cgit v1.2.3 From 679ca39fc670a5a95c2b40d2cc8cf2cee2486f7a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 18 Apr 2016 16:53:39 +0900 Subject: usb: gadget: udc: core: add usb_gadget_{un}map_request_by_dev() If the following environment, the first argument of DMA API should be set to a DMAC's device structure, not a udc controller's one. - A udc controller needs an external DMAC device (like a DMA Engine). - The external DMAC enables IOMMU. So, this patch add usb_gadget_{un}map_request_by_dev() API to set a DMAC's device structure by a udc controller driver. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/udc-core.c | 24 ++++++++++++++++++------ include/linux/usb/gadget.h | 4 ++++ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index c6e76465065a..6e8300d6a737 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -61,11 +61,9 @@ static int udc_bind_to_driver(struct usb_udc *udc, #ifdef CONFIG_HAS_DMA -int usb_gadget_map_request(struct usb_gadget *gadget, +int usb_gadget_map_request_by_dev(struct device *dev, struct usb_request *req, int is_in) { - struct device *dev = gadget->dev.parent; - if (req->length == 0) return 0; @@ -92,24 +90,38 @@ int usb_gadget_map_request(struct usb_gadget *gadget, return 0; } +EXPORT_SYMBOL_GPL(usb_gadget_map_request_by_dev); + +int usb_gadget_map_request(struct usb_gadget *gadget, + struct usb_request *req, int is_in) +{ + return usb_gadget_map_request_by_dev(gadget->dev.parent, req, is_in); +} EXPORT_SYMBOL_GPL(usb_gadget_map_request); -void usb_gadget_unmap_request(struct usb_gadget *gadget, +void usb_gadget_unmap_request_by_dev(struct device *dev, struct usb_request *req, int is_in) { if (req->length == 0) return; if (req->num_mapped_sgs) { - dma_unmap_sg(gadget->dev.parent, req->sg, req->num_mapped_sgs, + dma_unmap_sg(dev, req->sg, req->num_mapped_sgs, is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE); req->num_mapped_sgs = 0; } else { - dma_unmap_single(gadget->dev.parent, req->dma, req->length, + dma_unmap_single(dev, req->dma, req->length, is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE); } } +EXPORT_SYMBOL_GPL(usb_gadget_unmap_request_by_dev); + +void usb_gadget_unmap_request(struct usb_gadget *gadget, + struct usb_request *req, int is_in) +{ + usb_gadget_unmap_request_by_dev(gadget->dev.parent, req, is_in); +} EXPORT_SYMBOL_GPL(usb_gadget_unmap_request); #endif /* CONFIG_HAS_DMA */ diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 5d4e151c49bf..457651bf45b0 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1223,9 +1223,13 @@ int usb_otg_descriptor_init(struct usb_gadget *gadget, /* utility to simplify map/unmap of usb_requests to/from DMA */ +extern int usb_gadget_map_request_by_dev(struct device *dev, + struct usb_request *req, int is_in); extern int usb_gadget_map_request(struct usb_gadget *gadget, struct usb_request *req, int is_in); +extern void usb_gadget_unmap_request_by_dev(struct device *dev, + struct usb_request *req, int is_in); extern void usb_gadget_unmap_request(struct usb_gadget *gadget, struct usb_request *req, int is_in); -- cgit v1.2.3 From 7ef224d1d0e3a1ade02d02c01ce1dcffb736d2c3 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 3 Mar 2016 12:54:42 -0600 Subject: tracing: Add 'hist' event trigger command 'hist' triggers allow users to continually aggregate trace events, which can then be viewed afterwards by simply reading a 'hist' file containing the aggregation in a human-readable format. The basic idea is very simple and boils down to a mechanism whereby trace events, rather than being exhaustively dumped in raw form and viewed directly, are automatically 'compressed' into meaningful tables completely defined by the user. This is done strictly via single-line command-line commands and without the aid of any kind of programming language or interpreter. A surprising number of typical use cases can be accomplished by users via this simple mechanism. In fact, a large number of the tasks that users typically do using the more complicated script-based tracing tools, at least during the initial stages of an investigation, can be accomplished by simply specifying a set of keys and values to be used in the creation of a hash table. The Linux kernel trace event subsystem happens to provide an extensive list of keys and values ready-made for such a purpose in the form of the event format files associated with each trace event. By simply consulting the format file for field names of interest and by plugging them into the hist trigger command, users can create an endless number of useful aggregations to help with investigating various properties of the system. See Documentation/trace/events.txt for examples. hist triggers are implemented on top of the existing event trigger infrastructure, and as such are consistent with the existing triggers from a user's perspective as well. The basic syntax follows the existing trigger syntax. Users start an aggregation by writing a 'hist' trigger to the event of interest's trigger file: # echo hist:keys=xxx [ if filter] > event/trigger Once a hist trigger has been set up, by default it continually aggregates every matching event into a hash table using the event key and a value field named 'hitcount'. To view the aggregation at any point in time, simply read the 'hist' file in the same directory as the 'trigger' file: # cat event/hist The detailed syntax provides additional options for user control, and is described exhaustively in Documentation/trace/events.txt and in the virtual tracing/README file in the tracing subsystem. Link: http://lkml.kernel.org/r/72d263b5e1853fe9c314953b65833c3aa75479f2.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Tested-by: Masami Hiramatsu Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 1 + kernel/trace/Kconfig | 16 + kernel/trace/Makefile | 1 + kernel/trace/trace.c | 17 + kernel/trace/trace.h | 7 + kernel/trace/trace_events.c | 4 + kernel/trace/trace_events_hist.c | 849 ++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_trigger.c | 1 + 8 files changed, 896 insertions(+) create mode 100644 kernel/trace/trace_events_hist.c (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0810f81b6db2..404603720650 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -407,6 +407,7 @@ enum event_trigger_type { ETT_SNAPSHOT = (1 << 1), ETT_STACKTRACE = (1 << 2), ETT_EVENT_ENABLE = (1 << 3), + ETT_EVENT_HIST = (1 << 4), }; extern int filter_match_preds(struct event_filter *filter, void *rec); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d39556fd863a..fafeaf803bd0 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -538,6 +538,22 @@ config TRACING_MAP generally used outside of that context, and is normally selected by tracers that use it. +config HIST_TRIGGERS + bool "Histogram triggers" + depends on ARCH_HAVE_NMI_SAFE_CMPXCHG + select TRACING_MAP + default n + help + Hist triggers allow one or more arbitrary trace event fields + to be aggregated into hash tables and dumped to stdout by + reading a debugfs/tracefs file. They're useful for + gathering quick and dirty (though precise) summaries of + event activity as an initial guide for further investigation + using more advanced tools. + + See Documentation/trace/events.txt. + If in doubt, say N. + config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 4255c4057aaa..979e7bfbde7a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0d12dbde8399..6cf8fd03b028 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3812,6 +3812,9 @@ static const char readme_msg[] = #endif #ifdef CONFIG_TRACER_SNAPSHOT "\t\t snapshot\n" +#endif +#ifdef CONFIG_HIST_TRIGGERS + "\t\t hist (see below)\n" #endif "\t example: echo traceoff > events/block/block_unplug/trigger\n" "\t echo traceoff:3 > events/block/block_unplug/trigger\n" @@ -3828,6 +3831,20 @@ static const char readme_msg[] = "\t To remove a trigger with a count:\n" "\t echo '!:0 > //trigger\n" "\t Filters can be ignored when removing a trigger.\n" +#ifdef CONFIG_HIST_TRIGGERS + " hist trigger\t- If set, event hits are aggregated into a hash table\n" + "\t Format: hist:keys=\n" + "\t [:size=#entries]\n" + "\t [if ]\n\n" + "\t When a matching event is hit, an entry is added to a hash\n" + "\t table using the key named, and the value of a sum called\n" + "\t 'hitcount' is incremented. Keys correspond to fields in the\n" + "\t event's format description. Keys can be any field. The\n" + "\t 'size' parameter can be used to specify more or fewer than\n" + "\t the default 2048 entries for the hashtable size.\n\n" + "\t Reading the 'hist' file for the event will dump the hash\n" + "\t table in its entirety to stdout." +#endif ; static ssize_t diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2525042760e6..505f8a45f426 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1162,6 +1162,13 @@ extern struct mutex event_mutex; extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; +extern const struct file_operations event_hist_fops; + +#ifdef CONFIG_HIST_TRIGGERS +extern int register_trigger_hist_cmd(void); +#else +static inline int register_trigger_hist_cmd(void) { return 0; } +#endif extern int register_trigger_cmds(void); extern void clear_event_triggers(struct trace_array *tr); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index add81dff7520..e7cb983ee93c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2141,6 +2141,10 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) trace_create_file("trigger", 0644, file->dir, file, &event_trigger_fops); +#ifdef CONFIG_HIST_TRIGGERS + trace_create_file("hist", 0444, file->dir, file, + &event_hist_fops); +#endif trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c new file mode 100644 index 000000000000..23b45e462117 --- /dev/null +++ b/kernel/trace/trace_events_hist.c @@ -0,0 +1,849 @@ +/* + * trace_events_hist - trace event hist triggers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2015 Tom Zanussi + */ + +#include +#include +#include +#include +#include + +#include "tracing_map.h" +#include "trace.h" + +struct hist_field; + +typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event); + +struct hist_field { + struct ftrace_event_field *field; + unsigned long flags; + hist_field_fn_t fn; + unsigned int size; +}; + +static u64 hist_field_counter(struct hist_field *field, void *event) +{ + return 1; +} + +static u64 hist_field_string(struct hist_field *hist_field, void *event) +{ + char *addr = (char *)(event + hist_field->field->offset); + + return (u64)(unsigned long)addr; +} + +#define DEFINE_HIST_FIELD_FN(type) \ +static u64 hist_field_##type(struct hist_field *hist_field, void *event)\ +{ \ + type *addr = (type *)(event + hist_field->field->offset); \ + \ + return (u64)*addr; \ +} + +DEFINE_HIST_FIELD_FN(s64); +DEFINE_HIST_FIELD_FN(u64); +DEFINE_HIST_FIELD_FN(s32); +DEFINE_HIST_FIELD_FN(u32); +DEFINE_HIST_FIELD_FN(s16); +DEFINE_HIST_FIELD_FN(u16); +DEFINE_HIST_FIELD_FN(s8); +DEFINE_HIST_FIELD_FN(u8); + +#define for_each_hist_field(i, hist_data) \ + for ((i) = 0; (i) < (hist_data)->n_fields; (i)++) + +#define for_each_hist_val_field(i, hist_data) \ + for ((i) = 0; (i) < (hist_data)->n_vals; (i)++) + +#define for_each_hist_key_field(i, hist_data) \ + for ((i) = (hist_data)->n_vals; (i) < (hist_data)->n_fields; (i)++) + +#define HITCOUNT_IDX 0 +#define HIST_KEY_MAX 1 +#define HIST_KEY_SIZE_MAX MAX_FILTER_STR_VAL + +enum hist_field_flags { + HIST_FIELD_FL_HITCOUNT = 1, + HIST_FIELD_FL_KEY = 2, + HIST_FIELD_FL_STRING = 4, +}; + +struct hist_trigger_attrs { + char *keys_str; + unsigned int map_bits; +}; + +struct hist_trigger_data { + struct hist_field *fields[TRACING_MAP_FIELDS_MAX]; + unsigned int n_vals; + unsigned int n_keys; + unsigned int n_fields; + unsigned int key_size; + struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; + unsigned int n_sort_keys; + struct trace_event_file *event_file; + struct hist_trigger_attrs *attrs; + struct tracing_map *map; +}; + +static hist_field_fn_t select_value_fn(int field_size, int field_is_signed) +{ + hist_field_fn_t fn = NULL; + + switch (field_size) { + case 8: + if (field_is_signed) + fn = hist_field_s64; + else + fn = hist_field_u64; + break; + case 4: + if (field_is_signed) + fn = hist_field_s32; + else + fn = hist_field_u32; + break; + case 2: + if (field_is_signed) + fn = hist_field_s16; + else + fn = hist_field_u16; + break; + case 1: + if (field_is_signed) + fn = hist_field_s8; + else + fn = hist_field_u8; + break; + } + + return fn; +} + +static int parse_map_size(char *str) +{ + unsigned long size, map_bits; + int ret; + + strsep(&str, "="); + if (!str) { + ret = -EINVAL; + goto out; + } + + ret = kstrtoul(str, 0, &size); + if (ret) + goto out; + + map_bits = ilog2(roundup_pow_of_two(size)); + if (map_bits < TRACING_MAP_BITS_MIN || + map_bits > TRACING_MAP_BITS_MAX) + ret = -EINVAL; + else + ret = map_bits; + out: + return ret; +} + +static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs) +{ + if (!attrs) + return; + + kfree(attrs->keys_str); + kfree(attrs); +} + +static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) +{ + struct hist_trigger_attrs *attrs; + int ret = 0; + + attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return ERR_PTR(-ENOMEM); + + while (trigger_str) { + char *str = strsep(&trigger_str, ":"); + + if ((strncmp(str, "key=", strlen("key=")) == 0) || + (strncmp(str, "keys=", strlen("keys=")) == 0)) + attrs->keys_str = kstrdup(str, GFP_KERNEL); + else if (strncmp(str, "size=", strlen("size=")) == 0) { + int map_bits = parse_map_size(str); + + if (map_bits < 0) { + ret = map_bits; + goto free; + } + attrs->map_bits = map_bits; + } else { + ret = -EINVAL; + goto free; + } + } + + if (!attrs->keys_str) { + ret = -EINVAL; + goto free; + } + + return attrs; + free: + destroy_hist_trigger_attrs(attrs); + + return ERR_PTR(ret); +} + +static void destroy_hist_field(struct hist_field *hist_field) +{ + kfree(hist_field); +} + +static struct hist_field *create_hist_field(struct ftrace_event_field *field, + unsigned long flags) +{ + struct hist_field *hist_field; + + if (field && is_function_field(field)) + return NULL; + + hist_field = kzalloc(sizeof(struct hist_field), GFP_KERNEL); + if (!hist_field) + return NULL; + + if (flags & HIST_FIELD_FL_HITCOUNT) { + hist_field->fn = hist_field_counter; + goto out; + } + + if (is_string_field(field)) { + flags |= HIST_FIELD_FL_STRING; + hist_field->fn = hist_field_string; + } else { + hist_field->fn = select_value_fn(field->size, + field->is_signed); + if (!hist_field->fn) { + destroy_hist_field(hist_field); + return NULL; + } + } + out: + hist_field->field = field; + hist_field->flags = flags; + + return hist_field; +} + +static void destroy_hist_fields(struct hist_trigger_data *hist_data) +{ + unsigned int i; + + for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) { + if (hist_data->fields[i]) { + destroy_hist_field(hist_data->fields[i]); + hist_data->fields[i] = NULL; + } + } +} + +static int create_hitcount_val(struct hist_trigger_data *hist_data) +{ + hist_data->fields[HITCOUNT_IDX] = + create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT); + if (!hist_data->fields[HITCOUNT_IDX]) + return -ENOMEM; + + hist_data->n_vals++; + + if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) + return -EINVAL; + + return 0; +} + +static int create_val_fields(struct hist_trigger_data *hist_data, + struct trace_event_file *file) +{ + int ret; + + ret = create_hitcount_val(hist_data); + + return ret; +} + +static int create_key_field(struct hist_trigger_data *hist_data, + unsigned int key_idx, + struct trace_event_file *file, + char *field_str) +{ + struct ftrace_event_field *field = NULL; + unsigned long flags = 0; + unsigned int key_size; + int ret = 0; + + if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX)) + return -EINVAL; + + flags |= HIST_FIELD_FL_KEY; + + field = trace_find_event_field(file->event_call, field_str); + if (!field) { + ret = -EINVAL; + goto out; + } + + key_size = field->size; + + hist_data->fields[key_idx] = create_hist_field(field, flags); + if (!hist_data->fields[key_idx]) { + ret = -ENOMEM; + goto out; + } + + key_size = ALIGN(key_size, sizeof(u64)); + hist_data->fields[key_idx]->size = key_size; + hist_data->key_size = key_size; + if (hist_data->key_size > HIST_KEY_SIZE_MAX) { + ret = -EINVAL; + goto out; + } + + hist_data->n_keys++; + + if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX)) + return -EINVAL; + + ret = key_size; + out: + return ret; +} + +static int create_key_fields(struct hist_trigger_data *hist_data, + struct trace_event_file *file) +{ + unsigned int i, n_vals = hist_data->n_vals; + char *fields_str, *field_str; + int ret = -EINVAL; + + fields_str = hist_data->attrs->keys_str; + if (!fields_str) + goto out; + + strsep(&fields_str, "="); + if (!fields_str) + goto out; + + for (i = n_vals; i < n_vals + HIST_KEY_MAX; i++) { + field_str = strsep(&fields_str, ","); + if (!field_str) + break; + ret = create_key_field(hist_data, i, file, field_str); + if (ret < 0) + goto out; + } + if (fields_str) { + ret = -EINVAL; + goto out; + } + ret = 0; + out: + return ret; +} + +static int create_hist_fields(struct hist_trigger_data *hist_data, + struct trace_event_file *file) +{ + int ret; + + ret = create_val_fields(hist_data, file); + if (ret) + goto out; + + ret = create_key_fields(hist_data, file); + if (ret) + goto out; + + hist_data->n_fields = hist_data->n_vals + hist_data->n_keys; + out: + return ret; +} + +static int create_sort_keys(struct hist_trigger_data *hist_data) +{ + int ret = 0; + + hist_data->n_sort_keys = 1; /* sort_keys[0] is always hitcount */ + + return ret; +} + +static void destroy_hist_data(struct hist_trigger_data *hist_data) +{ + destroy_hist_trigger_attrs(hist_data->attrs); + destroy_hist_fields(hist_data); + tracing_map_destroy(hist_data->map); + kfree(hist_data); +} + +static int create_tracing_map_fields(struct hist_trigger_data *hist_data) +{ + struct tracing_map *map = hist_data->map; + struct ftrace_event_field *field; + struct hist_field *hist_field; + unsigned int i, idx; + + for_each_hist_field(i, hist_data) { + hist_field = hist_data->fields[i]; + if (hist_field->flags & HIST_FIELD_FL_KEY) { + tracing_map_cmp_fn_t cmp_fn; + + field = hist_field->field; + + if (is_string_field(field)) + cmp_fn = tracing_map_cmp_string; + else + cmp_fn = tracing_map_cmp_num(field->size, + field->is_signed); + idx = tracing_map_add_key_field(map, 0, cmp_fn); + } else + idx = tracing_map_add_sum_field(map); + + if (idx < 0) + return idx; + } + + return 0; +} + +static struct hist_trigger_data * +create_hist_data(unsigned int map_bits, + struct hist_trigger_attrs *attrs, + struct trace_event_file *file) +{ + struct hist_trigger_data *hist_data; + int ret = 0; + + hist_data = kzalloc(sizeof(*hist_data), GFP_KERNEL); + if (!hist_data) + return ERR_PTR(-ENOMEM); + + hist_data->attrs = attrs; + + ret = create_hist_fields(hist_data, file); + if (ret) + goto free; + + ret = create_sort_keys(hist_data); + if (ret) + goto free; + + hist_data->map = tracing_map_create(map_bits, hist_data->key_size, + NULL, hist_data); + if (IS_ERR(hist_data->map)) { + ret = PTR_ERR(hist_data->map); + hist_data->map = NULL; + goto free; + } + + ret = create_tracing_map_fields(hist_data); + if (ret) + goto free; + + ret = tracing_map_init(hist_data->map); + if (ret) + goto free; + + hist_data->event_file = file; + out: + return hist_data; + free: + hist_data->attrs = NULL; + + destroy_hist_data(hist_data); + + hist_data = ERR_PTR(ret); + + goto out; +} + +static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, + struct tracing_map_elt *elt, + void *rec) +{ + struct hist_field *hist_field; + unsigned int i; + u64 hist_val; + + for_each_hist_val_field(i, hist_data) { + hist_field = hist_data->fields[i]; + hist_val = hist_field->fn(hist_field, rec); + tracing_map_update_sum(elt, i, hist_val); + } +} + +static void event_hist_trigger(struct event_trigger_data *data, void *rec) +{ + struct hist_trigger_data *hist_data = data->private_data; + struct hist_field *key_field; + struct tracing_map_elt *elt; + u64 field_contents; + void *key = NULL; + unsigned int i; + + for_each_hist_key_field(i, hist_data) { + key_field = hist_data->fields[i]; + + field_contents = key_field->fn(key_field, rec); + if (key_field->flags & HIST_FIELD_FL_STRING) + key = (void *)(unsigned long)field_contents; + else + key = (void *)&field_contents; + } + + elt = tracing_map_insert(hist_data->map, key); + if (elt) + hist_trigger_elt_update(hist_data, elt, rec); +} + +static void +hist_trigger_entry_print(struct seq_file *m, + struct hist_trigger_data *hist_data, void *key, + struct tracing_map_elt *elt) +{ + struct hist_field *key_field; + unsigned int i; + u64 uval; + + seq_puts(m, "{ "); + + for_each_hist_key_field(i, hist_data) { + key_field = hist_data->fields[i]; + + if (i > hist_data->n_vals) + seq_puts(m, ", "); + + if (key_field->flags & HIST_FIELD_FL_STRING) { + seq_printf(m, "%s: %-50s", key_field->field->name, + (char *)key); + } else { + uval = *(u64 *)key; + seq_printf(m, "%s: %10llu", + key_field->field->name, uval); + } + } + + seq_puts(m, " }"); + + seq_printf(m, " hitcount: %10llu", + tracing_map_read_sum(elt, HITCOUNT_IDX)); + + seq_puts(m, "\n"); +} + +static int print_entries(struct seq_file *m, + struct hist_trigger_data *hist_data) +{ + struct tracing_map_sort_entry **sort_entries = NULL; + struct tracing_map *map = hist_data->map; + unsigned int i, n_entries; + + n_entries = tracing_map_sort_entries(map, hist_data->sort_keys, + hist_data->n_sort_keys, + &sort_entries); + if (n_entries < 0) + return n_entries; + + for (i = 0; i < n_entries; i++) + hist_trigger_entry_print(m, hist_data, + sort_entries[i]->key, + sort_entries[i]->elt); + + tracing_map_destroy_sort_entries(sort_entries, n_entries); + + return n_entries; +} + +static int hist_show(struct seq_file *m, void *v) +{ + struct event_trigger_data *test, *data = NULL; + struct trace_event_file *event_file; + struct hist_trigger_data *hist_data; + int n_entries, ret = 0; + + mutex_lock(&event_mutex); + + event_file = event_file_data(m->private); + if (unlikely(!event_file)) { + ret = -ENODEV; + goto out_unlock; + } + + list_for_each_entry_rcu(test, &event_file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { + data = test; + break; + } + } + if (!data) + goto out_unlock; + + seq_puts(m, "# event histogram\n#\n# trigger info: "); + data->ops->print(m, data->ops, data); + seq_puts(m, "\n"); + + hist_data = data->private_data; + n_entries = print_entries(m, hist_data); + if (n_entries < 0) { + ret = n_entries; + n_entries = 0; + } + + seq_printf(m, "\nTotals:\n Hits: %llu\n Entries: %u\n Dropped: %llu\n", + (u64)atomic64_read(&hist_data->map->hits), + n_entries, (u64)atomic64_read(&hist_data->map->drops)); + out_unlock: + mutex_unlock(&event_mutex); + + return ret; +} + +static int event_hist_open(struct inode *inode, struct file *file) +{ + return single_open(file, hist_show, file); +} + +const struct file_operations event_hist_fops = { + .open = event_hist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) +{ + seq_printf(m, "%s", hist_field->field->name); +} + +static int event_hist_trigger_print(struct seq_file *m, + struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + struct hist_trigger_data *hist_data = data->private_data; + struct hist_field *key_field; + unsigned int i; + + seq_puts(m, "hist:keys="); + + for_each_hist_key_field(i, hist_data) { + key_field = hist_data->fields[i]; + + if (i > hist_data->n_vals) + seq_puts(m, ","); + + hist_field_print(m, key_field); + } + + seq_puts(m, ":vals="); + seq_puts(m, "hitcount"); + + seq_puts(m, ":sort="); + seq_puts(m, "hitcount"); + + seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); + + if (data->filter_str) + seq_printf(m, " if %s", data->filter_str); + + seq_puts(m, " [active]"); + + seq_putc(m, '\n'); + + return 0; +} + +static void event_hist_trigger_free(struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + struct hist_trigger_data *hist_data = data->private_data; + + if (WARN_ON_ONCE(data->ref <= 0)) + return; + + data->ref--; + if (!data->ref) { + trigger_data_free(data); + destroy_hist_data(hist_data); + } +} + +static struct event_trigger_ops event_hist_trigger_ops = { + .func = event_hist_trigger, + .print = event_hist_trigger_print, + .init = event_trigger_init, + .free = event_hist_trigger_free, +}; + +static struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd, + char *param) +{ + return &event_hist_trigger_ops; +} + +static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct trace_event_file *file) +{ + struct event_trigger_data *test; + int ret = 0; + + list_for_each_entry_rcu(test, &file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { + ret = -EEXIST; + goto out; + } + } + + if (data->ops->init) { + ret = data->ops->init(data->ops, data); + if (ret < 0) + goto out; + } + + list_add_rcu(&data->list, &file->triggers); + ret++; + + update_cond_flag(file); + if (trace_event_trigger_enable_disable(file, 1) < 0) { + list_del_rcu(&data->list); + update_cond_flag(file); + ret--; + } + out: + return ret; +} + +static int event_hist_trigger_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) +{ + unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT; + struct event_trigger_data *trigger_data; + struct hist_trigger_attrs *attrs; + struct event_trigger_ops *trigger_ops; + struct hist_trigger_data *hist_data; + char *trigger; + int ret = 0; + + if (!param) + return -EINVAL; + + /* separate the trigger from the filter (k:v [if filter]) */ + trigger = strsep(¶m, " \t"); + if (!trigger) + return -EINVAL; + + attrs = parse_hist_trigger_attrs(trigger); + if (IS_ERR(attrs)) + return PTR_ERR(attrs); + + if (attrs->map_bits) + hist_trigger_bits = attrs->map_bits; + + hist_data = create_hist_data(hist_trigger_bits, attrs, file); + if (IS_ERR(hist_data)) { + destroy_hist_trigger_attrs(attrs); + return PTR_ERR(hist_data); + } + + trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); + + ret = -ENOMEM; + trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); + if (!trigger_data) + goto out_free; + + trigger_data->count = -1; + trigger_data->ops = trigger_ops; + trigger_data->cmd_ops = cmd_ops; + + INIT_LIST_HEAD(&trigger_data->list); + RCU_INIT_POINTER(trigger_data->filter, NULL); + + trigger_data->private_data = hist_data; + + if (glob[0] == '!') { + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + ret = 0; + goto out_free; + } + + if (!param) /* if param is non-empty, it's supposed to be a filter */ + goto out_reg; + + if (!cmd_ops->set_filter) + goto out_reg; + + ret = cmd_ops->set_filter(param, trigger_data, file); + if (ret < 0) + goto out_free; + out_reg: + ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + /* + * The above returns on success the # of triggers registered, + * but if it didn't register any it returns zero. Consider no + * triggers registered a failure too. + */ + if (!ret) { + ret = -ENOENT; + goto out_free; + } else if (ret < 0) + goto out_free; + /* Just return zero, not the number of registered triggers */ + ret = 0; + out: + return ret; + out_free: + if (cmd_ops->set_filter) + cmd_ops->set_filter(NULL, trigger_data, NULL); + + kfree(trigger_data); + + destroy_hist_data(hist_data); + goto out; +} + +static struct event_command trigger_hist_cmd = { + .name = "hist", + .trigger_type = ETT_EVENT_HIST, + .flags = EVENT_CMD_FL_NEEDS_REC, + .func = event_hist_trigger_func, + .reg = hist_register_trigger, + .unreg = unregister_trigger, + .get_trigger_ops = event_hist_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +__init int register_trigger_hist_cmd(void) +{ + int ret; + + ret = register_event_command(&trigger_hist_cmd); + WARN_ON(ret < 0); + + return ret; +} diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d67992f3bb0e..d29092afe005 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1447,6 +1447,7 @@ __init int register_trigger_cmds(void) register_trigger_snapshot_cmd(); register_trigger_stacktrace_cmd(); register_trigger_enable_disable_cmds(); + register_trigger_hist_cmd(); return 0; } -- cgit v1.2.3 From 97865fe41322d83dac4373fe0a0de5b1a1b318c5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Mar 2016 14:18:05 +0100 Subject: iio: st_sensors: verify interrupt event to status This makes all ST sensor drivers check that they actually have new data available for the requested channel(s) before claiming an IRQ, by reading the status register (which is conveniently the same for all ST sensors) and check that the channel has new data before proceeding to read it and fill the buffer. This way sensors can share an interrupt line: it can be flaged as shared and then the sensor that did not fire will return NO_IRQ, and the sensor that fired will handle the IRQ and return IRQ_HANDLED. Cc: Giuseppe Barba Cc: Denis Ciocca Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 5 +++++ drivers/iio/common/st_sensors/st_sensors_buffer.c | 18 ++++++++++++++++++ drivers/iio/gyro/st_gyro_core.c | 3 +++ drivers/iio/magnetometer/st_magn_core.c | 1 + drivers/iio/pressure/st_pressure_core.c | 2 ++ include/linux/iio/common/st_sensors.h | 3 +++ 6 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index fee32e3d7a05..9fb6d35fce5b 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -332,6 +332,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int2 = ST_ACCEL_1_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_ACCEL_1_IHL_IRQ_ADDR, .mask_ihl = ST_ACCEL_1_IHL_IRQ_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_ACCEL_1_MULTIREAD_BIT, .bootime = 2, @@ -397,6 +398,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int2 = ST_ACCEL_2_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_ACCEL_2_IHL_IRQ_ADDR, .mask_ihl = ST_ACCEL_2_IHL_IRQ_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_ACCEL_2_MULTIREAD_BIT, .bootime = 2, @@ -474,6 +476,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int2 = ST_ACCEL_3_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_ACCEL_3_IHL_IRQ_ADDR, .mask_ihl = ST_ACCEL_3_IHL_IRQ_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, .ig1 = { .en_addr = ST_ACCEL_3_IG1_EN_ADDR, .en_mask = ST_ACCEL_3_IG1_EN_MASK, @@ -532,6 +535,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .drdy_irq = { .addr = ST_ACCEL_4_DRDY_IRQ_ADDR, .mask_int1 = ST_ACCEL_4_DRDY_IRQ_INT1_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_ACCEL_4_MULTIREAD_BIT, .bootime = 2, /* guess */ @@ -583,6 +587,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int2 = ST_ACCEL_5_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_ACCEL_5_IHL_IRQ_ADDR, .mask_ihl = ST_ACCEL_5_IHL_IRQ_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_ACCEL_5_MULTIREAD_BIT, .bootime = 2, /* guess */ diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index 2ce0d2a3f855..c55898543a47 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -58,6 +58,24 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct st_sensor_data *sdata = iio_priv(indio_dev); + /* If we have a status register, check if this IRQ came from us */ + if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) { + u8 status; + + len = sdata->tf->read_byte(&sdata->tb, sdata->dev, + sdata->sensor_settings->drdy_irq.addr_stat_drdy, + &status); + if (len < 0) + dev_err(sdata->dev, "could not read channel status\n"); + + /* + * If this was not caused by any channels on this sensor, + * return IRQ_NONE + */ + if (!(status & (u8)indio_dev->active_scan_mask[0])) + return IRQ_NONE; + } + len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data); if (len < 0) goto st_sensors_get_buffer_element_error; diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index 110f95b6e52f..be9057e89dc3 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -190,6 +190,7 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { * drain settings, but only for INT1 and not * for the DRDY line on INT2. */ + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_GYRO_1_MULTIREAD_BIT, .bootime = 2, @@ -258,6 +259,7 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { * drain settings, but only for INT1 and not * for the DRDY line on INT2. */ + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_GYRO_2_MULTIREAD_BIT, .bootime = 2, @@ -322,6 +324,7 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { * drain settings, but only for INT1 and not * for the DRDY line on INT2. */ + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_GYRO_3_MULTIREAD_BIT, .bootime = 2, diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 501f858df413..62036d2a9956 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -484,6 +484,7 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { .mask_int1 = ST_MAGN_3_DRDY_INT_MASK, .addr_ihl = ST_MAGN_3_IHL_IRQ_ADDR, .mask_ihl = ST_MAGN_3_IHL_IRQ_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_MAGN_3_MULTIREAD_BIT, .bootime = 2, diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 172393ad34af..1cd37eaa4a57 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -226,6 +226,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .mask_int2 = ST_PRESS_LPS331AP_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_PRESS_LPS331AP_IHL_IRQ_ADDR, .mask_ihl = ST_PRESS_LPS331AP_IHL_IRQ_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_PRESS_LPS331AP_MULTIREAD_BIT, .bootime = 2, @@ -312,6 +313,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .mask_int2 = ST_PRESS_LPS25H_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_PRESS_LPS25H_IHL_IRQ_ADDR, .mask_ihl = ST_PRESS_LPS25H_IHL_IRQ_MASK, + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_PRESS_LPS25H_MULTIREAD_BIT, .bootime = 2, diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 6670c3d25c58..d8da075bfda0 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -37,6 +37,7 @@ #define ST_SENSORS_DEFAULT_AXIS_ADDR 0x20 #define ST_SENSORS_DEFAULT_AXIS_MASK 0x07 #define ST_SENSORS_DEFAULT_AXIS_N_BIT 3 +#define ST_SENSORS_DEFAULT_STAT_ADDR 0x27 #define ST_SENSORS_MAX_NAME 17 #define ST_SENSORS_MAX_4WAI 7 @@ -121,6 +122,7 @@ struct st_sensor_bdu { * @mask_int2: mask to enable/disable IRQ on INT2 pin. * @addr_ihl: address to enable/disable active low on the INT lines. * @mask_ihl: mask to enable/disable active low on the INT lines. + * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt * struct ig1 - represents the Interrupt Generator 1 of sensors. * @en_addr: address of the enable ig1 register. * @en_mask: mask to write the on/off value for enable. @@ -131,6 +133,7 @@ struct st_sensor_data_ready_irq { u8 mask_int2; u8 addr_ihl; u8 mask_ihl; + u8 addr_stat_drdy; struct { u8 en_addr; u8 en_mask; -- cgit v1.2.3 From 0e6f6871a1591f4bb0971809c45bc91a991f1967 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 14 Apr 2016 10:45:21 +0200 Subject: iio: st_sensors: support open drain mode Some types of ST Sensors can be connected to the same IRQ line as other peripherals using open drain. Add a device tree binding and a sensor data property to flip the right bit in the interrupt control register to enable open drain mode on the INT line. If the line is set to be open drain, also tag on IRQF_SHARED to the IRQ flags when requesting the interrupt, as the whole point of using open drain interrupt lines is to share them with more than one peripheral (wire-or). Cc: devicetree@vger.kernel.org Cc: Giuseppe Barba Cc: Denis Ciocca Acked-by: Rob Herring Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/st-sensors.txt | 4 ++++ drivers/iio/accel/st_accel_core.c | 8 ++++++++ drivers/iio/common/st_sensors/st_sensors_core.c | 20 ++++++++++++++++++++ drivers/iio/common/st_sensors/st_sensors_trigger.c | 13 +++++++++++++ drivers/iio/pressure/st_pressure_core.c | 8 ++++++++ include/linux/iio/common/st_sensors.h | 6 ++++++ include/linux/platform_data/st_sensors_pdata.h | 2 ++ 7 files changed, 61 insertions(+) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/iio/st-sensors.txt b/Documentation/devicetree/bindings/iio/st-sensors.txt index 71b7bdff21cd..637e283f4a8b 100644 --- a/Documentation/devicetree/bindings/iio/st-sensors.txt +++ b/Documentation/devicetree/bindings/iio/st-sensors.txt @@ -16,6 +16,10 @@ Optional properties: - st,drdy-int-pin: the pin on the package that will be used to signal "data ready" (valid values: 1 or 2). This property is not configurable on all sensors. +- drive-open-drain: the interrupt/data ready line will be configured + as open drain, which is useful if several sensors share the same + interrupt line. (This binding is taken from pinctrl/pinctrl-bindings.txt) + This is a boolean property. Sensors may also have applicable pin control settings, those use the standard bindings from pinctrl/pinctrl-bindings.txt. diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 9fb6d35fce5b..dc73f2d85e6d 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -99,6 +99,8 @@ #define ST_ACCEL_2_DRDY_IRQ_INT2_MASK 0x10 #define ST_ACCEL_2_IHL_IRQ_ADDR 0x22 #define ST_ACCEL_2_IHL_IRQ_MASK 0x80 +#define ST_ACCEL_2_OD_IRQ_ADDR 0x22 +#define ST_ACCEL_2_OD_IRQ_MASK 0x40 #define ST_ACCEL_2_MULTIREAD_BIT true /* CUSTOM VALUES FOR SENSOR 3 */ @@ -180,6 +182,8 @@ #define ST_ACCEL_5_DRDY_IRQ_INT2_MASK 0x20 #define ST_ACCEL_5_IHL_IRQ_ADDR 0x22 #define ST_ACCEL_5_IHL_IRQ_MASK 0x80 +#define ST_ACCEL_5_OD_IRQ_ADDR 0x22 +#define ST_ACCEL_5_OD_IRQ_MASK 0x40 #define ST_ACCEL_5_IG1_EN_ADDR 0x21 #define ST_ACCEL_5_IG1_EN_MASK 0x08 #define ST_ACCEL_5_MULTIREAD_BIT false @@ -398,6 +402,8 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int2 = ST_ACCEL_2_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_ACCEL_2_IHL_IRQ_ADDR, .mask_ihl = ST_ACCEL_2_IHL_IRQ_MASK, + .addr_od = ST_ACCEL_2_OD_IRQ_ADDR, + .mask_od = ST_ACCEL_2_OD_IRQ_MASK, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_ACCEL_2_MULTIREAD_BIT, @@ -587,6 +593,8 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int2 = ST_ACCEL_5_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_ACCEL_5_IHL_IRQ_ADDR, .mask_ihl = ST_ACCEL_5_IHL_IRQ_MASK, + .addr_od = ST_ACCEL_5_OD_IRQ_ADDR, + .mask_od = ST_ACCEL_5_OD_IRQ_MASK, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_ACCEL_5_MULTIREAD_BIT, diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index f5a2d445d0c0..dffe00692169 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -301,6 +301,14 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev, return -EINVAL; } + if (pdata->open_drain) { + if (!sdata->sensor_settings->drdy_irq.addr_od) + dev_err(&indio_dev->dev, + "open drain requested but unsupported.\n"); + else + sdata->int_pin_open_drain = true; + } + return 0; } @@ -321,6 +329,8 @@ static struct st_sensors_platform_data *st_sensors_of_probe(struct device *dev, else pdata->drdy_int_pin = defdata ? defdata->drdy_int_pin : 0; + pdata->open_drain = of_property_read_bool(np, "drive-open-drain"); + return pdata; } #else @@ -374,6 +384,16 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, return err; } + if (sdata->int_pin_open_drain) { + dev_info(&indio_dev->dev, + "set interrupt line to open drain mode\n"); + err = st_sensors_write_data_with_mask(indio_dev, + sdata->sensor_settings->drdy_irq.addr_od, + sdata->sensor_settings->drdy_irq.mask_od, 1); + if (err < 0) + return err; + } + err = st_sensors_set_axis_enable(indio_dev, ST_SENSORS_ENABLE_ALL_AXIS); return err; diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 6a8c98327945..da72279fcf99 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -64,6 +64,19 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, "rising edge\n", irq_trig); irq_trig = IRQF_TRIGGER_RISING; } + + /* + * If the interrupt pin is Open Drain, by definition this + * means that the interrupt line may be shared with other + * peripherals. But to do this we also need to have a status + * register and mask to figure out if this sensor was firing + * the IRQ or not, so we can tell the interrupt handle that + * it was "our" interrupt. + */ + if (sdata->int_pin_open_drain && + sdata->sensor_settings->drdy_irq.addr_stat_drdy) + irq_trig |= IRQF_SHARED; + err = request_threaded_irq(irq, iio_trigger_generic_data_rdy_poll, NULL, diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 1cd37eaa4a57..9e9b72a8f18f 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -64,6 +64,8 @@ #define ST_PRESS_LPS331AP_DRDY_IRQ_INT2_MASK 0x20 #define ST_PRESS_LPS331AP_IHL_IRQ_ADDR 0x22 #define ST_PRESS_LPS331AP_IHL_IRQ_MASK 0x80 +#define ST_PRESS_LPS331AP_OD_IRQ_ADDR 0x22 +#define ST_PRESS_LPS331AP_OD_IRQ_MASK 0x40 #define ST_PRESS_LPS331AP_MULTIREAD_BIT true #define ST_PRESS_LPS331AP_TEMP_OFFSET 42500 @@ -104,6 +106,8 @@ #define ST_PRESS_LPS25H_DRDY_IRQ_INT2_MASK 0x10 #define ST_PRESS_LPS25H_IHL_IRQ_ADDR 0x22 #define ST_PRESS_LPS25H_IHL_IRQ_MASK 0x80 +#define ST_PRESS_LPS25H_OD_IRQ_ADDR 0x22 +#define ST_PRESS_LPS25H_OD_IRQ_MASK 0x40 #define ST_PRESS_LPS25H_MULTIREAD_BIT true #define ST_PRESS_LPS25H_TEMP_OFFSET 42500 #define ST_PRESS_LPS25H_OUT_XL_ADDR 0x28 @@ -226,6 +230,8 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .mask_int2 = ST_PRESS_LPS331AP_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_PRESS_LPS331AP_IHL_IRQ_ADDR, .mask_ihl = ST_PRESS_LPS331AP_IHL_IRQ_MASK, + .addr_od = ST_PRESS_LPS331AP_OD_IRQ_ADDR, + .mask_od = ST_PRESS_LPS331AP_OD_IRQ_MASK, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_PRESS_LPS331AP_MULTIREAD_BIT, @@ -313,6 +319,8 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .mask_int2 = ST_PRESS_LPS25H_DRDY_IRQ_INT2_MASK, .addr_ihl = ST_PRESS_LPS25H_IHL_IRQ_ADDR, .mask_ihl = ST_PRESS_LPS25H_IHL_IRQ_MASK, + .addr_od = ST_PRESS_LPS25H_OD_IRQ_ADDR, + .mask_od = ST_PRESS_LPS25H_OD_IRQ_MASK, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = ST_PRESS_LPS25H_MULTIREAD_BIT, diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index d8da075bfda0..d029ffac0d69 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -122,6 +122,8 @@ struct st_sensor_bdu { * @mask_int2: mask to enable/disable IRQ on INT2 pin. * @addr_ihl: address to enable/disable active low on the INT lines. * @mask_ihl: mask to enable/disable active low on the INT lines. + * @addr_od: address to enable/disable Open Drain on the INT lines. + * @mask_od: mask to enable/disable Open Drain on the INT lines. * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt * struct ig1 - represents the Interrupt Generator 1 of sensors. * @en_addr: address of the enable ig1 register. @@ -133,6 +135,8 @@ struct st_sensor_data_ready_irq { u8 mask_int2; u8 addr_ihl; u8 mask_ihl; + u8 addr_od; + u8 mask_od; u8 addr_stat_drdy; struct { u8 en_addr; @@ -215,6 +219,7 @@ struct st_sensor_settings { * @odr: Output data rate of the sensor [Hz]. * num_data_channels: Number of data channels used in buffer. * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2). + * @int_pin_open_drain: Set the interrupt/DRDY to open drain. * @get_irq_data_ready: Function to get the IRQ used for data ready signal. * @tf: Transfer function structure used by I/O operations. * @tb: Transfer buffers and mutex used by I/O operations. @@ -236,6 +241,7 @@ struct st_sensor_data { unsigned int num_data_channels; u8 drdy_int_pin; + bool int_pin_open_drain; unsigned int (*get_irq_data_ready) (struct iio_dev *indio_dev); diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h index 753839187ba0..79b0e4cdb814 100644 --- a/include/linux/platform_data/st_sensors_pdata.h +++ b/include/linux/platform_data/st_sensors_pdata.h @@ -16,9 +16,11 @@ * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2). * Available only for accelerometer and pressure sensors. * Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet). + * @open_drain: set the interrupt line to be open drain if possible. */ struct st_sensors_platform_data { u8 drdy_int_pin; + bool open_drain; }; #endif /* ST_SENSORS_PDATA_H */ -- cgit v1.2.3 From 8bf872d8d261feefcdf67027522e3f717cad2bfe Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 6 Apr 2016 16:01:06 +0530 Subject: iio: core: Add devm_ APIs for iio_channel_{get,release} Some of kernel driver uses the IIO framework to get the sensor value via ADC or IIO HW driver. The client driver get iio channel by iio_channel_get() and release it by calling iio_channel_release(). Add resource managed version (devm_*) of these APIs so that if client calls the devm_iio_channel_get() then it need not to release it explicitly, it can be done by managed device framework when driver get un-binded. This reduces the code in error path and also need of .remove callback in some cases. Signed-off-by: Laxman Dewangan Signed-off-by: Jonathan Cameron --- drivers/iio/inkern.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/iio/consumer.h | 27 +++++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'include/linux') diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 2fc7928f401d..9fd8934c1887 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -356,6 +356,54 @@ void iio_channel_release(struct iio_channel *channel) } EXPORT_SYMBOL_GPL(iio_channel_release); +static void devm_iio_channel_free(struct device *dev, void *res) +{ + struct iio_channel *channel = *(struct iio_channel **)res; + + iio_channel_release(channel); +} + +static int devm_iio_channel_match(struct device *dev, void *res, void *data) +{ + struct iio_channel **r = res; + + if (!r || !*r) { + WARN_ON(!r || !*r); + return 0; + } + + return *r == data; +} + +struct iio_channel *devm_iio_channel_get(struct device *dev, + const char *channel_name) +{ + struct iio_channel **ptr, *channel; + + ptr = devres_alloc(devm_iio_channel_free, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + channel = iio_channel_get(dev, channel_name); + if (IS_ERR(channel)) { + devres_free(ptr); + return channel; + } + + *ptr = channel; + devres_add(dev, ptr); + + return channel; +} +EXPORT_SYMBOL_GPL(devm_iio_channel_get); + +void devm_iio_channel_release(struct device *dev, struct iio_channel *channel) +{ + WARN_ON(devres_release(dev, devm_iio_channel_free, + devm_iio_channel_match, channel)); +} +EXPORT_SYMBOL_GPL(devm_iio_channel_release); + struct iio_channel *iio_channel_get_all(struct device *dev) { const char *name; diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index fad58671c49e..e1e033d6a81f 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -48,6 +48,33 @@ struct iio_channel *iio_channel_get(struct device *dev, */ void iio_channel_release(struct iio_channel *chan); +/** + * devm_iio_channel_get() - Resource managed version of iio_channel_get(). + * @dev: Pointer to consumer device. Device name must match + * the name of the device as provided in the iio_map + * with which the desired provider to consumer mapping + * was registered. + * @consumer_channel: Unique name to identify the channel on the consumer + * side. This typically describes the channels use within + * the consumer. E.g. 'battery_voltage' + * + * Returns a pointer to negative errno if it is not able to get the iio channel + * otherwise returns valid pointer for iio channel. + * + * The allocated iio channel is automatically released when the device is + * unbound. + */ +struct iio_channel *devm_iio_channel_get(struct device *dev, + const char *consumer_channel); +/** + * devm_iio_channel_release() - Resource managed version of + * iio_channel_release(). + * @dev: Pointer to consumer device for which resource + * is allocared. + * @chan: The channel to be released. + */ +void devm_iio_channel_release(struct device *dev, struct iio_channel *chan); + /** * iio_channel_get_all() - get all channels associated with a client * @dev: Pointer to consumer device. -- cgit v1.2.3 From efc2c0133f198bc65593a67015af358919b0c48f Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 6 Apr 2016 16:01:07 +0530 Subject: iio: core: Add devm_ APIs for iio_channel_{get,release}_all Some of kernel driver uses the IIO framework to get the sensor value via ADC or IIO HW driver. The client driver get iio channel by iio_channel_get_all() and release it by calling iio_channel_release_all(). Add resource managed version (devm_*) of these APIs so that if client calls the devm_iio_channel_get_all() then it need not to release it explicitly, it can be done by managed device framework when driver get un-binded. This reduces the code in error path and also need of .remove callback in some cases. Signed-off-by: Laxman Dewangan Signed-off-by: Jonathan Cameron --- drivers/iio/inkern.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/iio/consumer.h | 26 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'include/linux') diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 9fd8934c1887..c4757e6367e7 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -489,6 +489,42 @@ void iio_channel_release_all(struct iio_channel *channels) } EXPORT_SYMBOL_GPL(iio_channel_release_all); +static void devm_iio_channel_free_all(struct device *dev, void *res) +{ + struct iio_channel *channels = *(struct iio_channel **)res; + + iio_channel_release_all(channels); +} + +struct iio_channel *devm_iio_channel_get_all(struct device *dev) +{ + struct iio_channel **ptr, *channels; + + ptr = devres_alloc(devm_iio_channel_free_all, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + channels = iio_channel_get_all(dev); + if (IS_ERR(channels)) { + devres_free(ptr); + return channels; + } + + *ptr = channels; + devres_add(dev, ptr); + + return channels; +} +EXPORT_SYMBOL_GPL(devm_iio_channel_get_all); + +void devm_iio_channel_release_all(struct device *dev, + struct iio_channel *channels) +{ + WARN_ON(devres_release(dev, devm_iio_channel_free_all, + devm_iio_channel_match, channels)); +} +EXPORT_SYMBOL_GPL(devm_iio_channel_release_all); + static int iio_channel_read(struct iio_channel *chan, int *val, int *val2, enum iio_chan_info_enum info) { diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index e1e033d6a81f..3d672f72e7ec 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -92,6 +92,32 @@ struct iio_channel *iio_channel_get_all(struct device *dev); */ void iio_channel_release_all(struct iio_channel *chan); +/** + * devm_iio_channel_get_all() - Resource managed version of + * iio_channel_get_all(). + * @dev: Pointer to consumer device. + * + * Returns a pointer to negative errno if it is not able to get the iio channel + * otherwise returns an array of iio_channel structures terminated with one with + * null iio_dev pointer. + * + * This function is used by fairly generic consumers to get all the + * channels registered as having this consumer. + * + * The allocated iio channels are automatically released when the device is + * unbounded. + */ +struct iio_channel *devm_iio_channel_get_all(struct device *dev); + +/** + * devm_iio_channel_release_all() - Resource managed version of + * iio_channel_release_all(). + * @dev: Pointer to consumer device for which resource + * is allocared. + * @chan: Array channel to be released. + */ +void devm_iio_channel_release_all(struct device *dev, struct iio_channel *chan); + struct iio_cb_buffer; /** * iio_channel_get_all_cb() - register callback for triggered capture -- cgit v1.2.3 From b0fcd8ab7b3c89b5da7fff5224d06ed73e7a33cc Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 23 Mar 2016 11:19:00 +0100 Subject: mtd: nand: add new enum for storing ECC algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our nand_ecc_modes_t is already a bit abused by value NAND_ECC_SOFT_BCH. This enum should store ECC mode only and putting algorithm details there is a bad idea. It would result in too many values impossible to support in a sane way. To solve this problem let's add a new enum. We'll have to modify all drivers to set it properly but once it's done it'll be possible to drop NAND_ECC_SOFT_BCH. That will result in a cleaner design and more possibilities like setting ECC algorithm for hardware ECC mode. Signed-off-by: Rafał Miłecki Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 56574ba36555..1b673e19667c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -119,6 +119,12 @@ typedef enum { NAND_ECC_SOFT_BCH, } nand_ecc_modes_t; +enum nand_ecc_algo { + NAND_ECC_UNKNOWN, + NAND_ECC_HAMMING, + NAND_ECC_BCH, +}; + /* * Constants for Hardware ECC */ @@ -458,6 +464,7 @@ struct nand_hw_control { /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode + * @algo: ECC algorithm * @steps: number of ECC steps per page * @size: data bytes per ECC step * @bytes: ECC bytes per step @@ -508,6 +515,7 @@ struct nand_hw_control { */ struct nand_ecc_ctrl { nand_ecc_modes_t mode; + enum nand_ecc_algo algo; int steps; int size; int bytes; -- cgit v1.2.3 From dd2dcc004230b9d8fa809102cd326e3ee4bbdb2a Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 23 Mar 2016 11:19:01 +0100 Subject: of: mtd: prepare helper reading NAND ECC algo from DT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NAND subsystem is being slightly reworked to store ECC details in separated fields. In future we'll want to add support for more DT properties as specifying every possible setup with a single "nand-ecc-mode" is a pretty bad idea. To allow this let's add a helper that will support something like "nand-ecc-algo" in future. Right now we use it for keeping backward compatibility. Signed-off-by: Rafał Miłecki Signed-off-by: Boris Brezillon --- drivers/of/of_mtd.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/of_mtd.h | 6 ++++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/of/of_mtd.c b/drivers/of/of_mtd.c index b7361ed70537..15d056e181d2 100644 --- a/drivers/of/of_mtd.c +++ b/drivers/of/of_mtd.c @@ -49,6 +49,42 @@ int of_get_nand_ecc_mode(struct device_node *np) } EXPORT_SYMBOL_GPL(of_get_nand_ecc_mode); +/** + * of_get_nand_ecc_algo - Get nand ecc algorithm for given device_node + * @np: Pointer to the given device_node + * + * The function gets ecc algorithm and returns its enum value, or errno in error + * case. + */ +int of_get_nand_ecc_algo(struct device_node *np) +{ + const char *pm; + int err; + + /* + * TODO: Read ECC algo OF property and map it to enum nand_ecc_algo. + * It's not implemented yet as currently NAND subsystem ignores + * algorithm explicitly set this way. Once it's handled we should + * document & support new property. + */ + + /* + * For backward compatibility we also read "nand-ecc-mode" checking + * for some obsoleted values that were specifying ECC algorithm. + */ + err = of_property_read_string(np, "nand-ecc-mode", &pm); + if (err < 0) + return err; + + if (!strcasecmp(pm, "soft")) + return NAND_ECC_HAMMING; + else if (!strcasecmp(pm, "soft_bch")) + return NAND_ECC_BCH; + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(of_get_nand_ecc_algo); + /** * of_get_nand_ecc_step_size - Get ECC step size associated to * the required ECC strength (see below). diff --git a/include/linux/of_mtd.h b/include/linux/of_mtd.h index e266caa36402..0f6aca5c6f2f 100644 --- a/include/linux/of_mtd.h +++ b/include/linux/of_mtd.h @@ -13,6 +13,7 @@ #include int of_get_nand_ecc_mode(struct device_node *np); +int of_get_nand_ecc_algo(struct device_node *np); int of_get_nand_ecc_step_size(struct device_node *np); int of_get_nand_ecc_strength(struct device_node *np); int of_get_nand_bus_width(struct device_node *np); @@ -25,6 +26,11 @@ static inline int of_get_nand_ecc_mode(struct device_node *np) return -ENOSYS; } +static inline int of_get_nand_ecc_algo(struct device_node *np) +{ + return -ENOSYS; +} + static inline int of_get_nand_ecc_step_size(struct device_node *np) { return -ENOSYS; -- cgit v1.2.3 From 7a654172161c8c9c7d59cbd0054d9e63c7411219 Mon Sep 17 00:00:00 2001 From: Raghav Dogra Date: Wed, 17 Feb 2016 16:54:18 +0530 Subject: mtd/ifc: Add support for IFC controller version 2.0 The new IFC controller version 2.0 has a different memory map page. Upto IFC 1.4 PAGE size is 4 KB and from IFC2.0 PAGE size is 64KB. This patch segregates the IFC global and runtime registers to appropriate PAGE sizes. Signed-off-by: Jaiprakash Singh Signed-off-by: Raghav Dogra Acked-by: Li Yang Signed-off-by: Raghav Dogra Acked-by: Scott Wood Acked-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/memory/fsl_ifc.c | 36 ++++++++++----------- drivers/mtd/nand/fsl_ifc_nand.c | 72 ++++++++++++++++++++++------------------- include/linux/fsl_ifc.h | 45 +++++++++++++++++--------- 3 files changed, 87 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index 2a691da8c1c7..904b4af5f142 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -59,11 +59,11 @@ int fsl_ifc_find(phys_addr_t addr_base) { int i = 0; - if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs) + if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->gregs) return -ENODEV; for (i = 0; i < fsl_ifc_ctrl_dev->banks; i++) { - u32 cspr = ifc_in32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr); + u32 cspr = ifc_in32(&fsl_ifc_ctrl_dev->gregs->cspr_cs[i].cspr); if (cspr & CSPR_V && (cspr & CSPR_BA) == convert_ifc_address(addr_base)) return i; @@ -75,7 +75,7 @@ EXPORT_SYMBOL(fsl_ifc_find); static int fsl_ifc_ctrl_init(struct fsl_ifc_ctrl *ctrl) { - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_global __iomem *ifc = ctrl->gregs; /* * Clear all the common status and event registers @@ -104,7 +104,7 @@ static int fsl_ifc_ctrl_remove(struct platform_device *dev) irq_dispose_mapping(ctrl->nand_irq); irq_dispose_mapping(ctrl->irq); - iounmap(ctrl->regs); + iounmap(ctrl->gregs); dev_set_drvdata(&dev->dev, NULL); kfree(ctrl); @@ -122,7 +122,7 @@ static DEFINE_SPINLOCK(nand_irq_lock); static u32 check_nand_stat(struct fsl_ifc_ctrl *ctrl) { - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; unsigned long flags; u32 stat; @@ -157,7 +157,7 @@ static irqreturn_t fsl_ifc_nand_irq(int irqno, void *data) static irqreturn_t fsl_ifc_ctrl_irq(int irqno, void *data) { struct fsl_ifc_ctrl *ctrl = data; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_global __iomem *ifc = ctrl->gregs; u32 err_axiid, err_srcid, status, cs_err, err_addr; irqreturn_t ret = IRQ_NONE; @@ -215,6 +215,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) { int ret = 0; int version, banks; + void __iomem *addr; dev_info(&dev->dev, "Freescale Integrated Flash Controller\n"); @@ -225,22 +226,13 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) dev_set_drvdata(&dev->dev, fsl_ifc_ctrl_dev); /* IOMAP the entire IFC region */ - fsl_ifc_ctrl_dev->regs = of_iomap(dev->dev.of_node, 0); - if (!fsl_ifc_ctrl_dev->regs) { + fsl_ifc_ctrl_dev->gregs = of_iomap(dev->dev.of_node, 0); + if (!fsl_ifc_ctrl_dev->gregs) { dev_err(&dev->dev, "failed to get memory region\n"); ret = -ENODEV; goto err; } - version = ifc_in32(&fsl_ifc_ctrl_dev->regs->ifc_rev) & - FSL_IFC_VERSION_MASK; - banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8; - dev_info(&dev->dev, "IFC version %d.%d, %d banks\n", - version >> 24, (version >> 16) & 0xf, banks); - - fsl_ifc_ctrl_dev->version = version; - fsl_ifc_ctrl_dev->banks = banks; - if (of_property_read_bool(dev->dev.of_node, "little-endian")) { fsl_ifc_ctrl_dev->little_endian = true; dev_dbg(&dev->dev, "IFC REGISTERS are LITTLE endian\n"); @@ -249,8 +241,9 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) dev_dbg(&dev->dev, "IFC REGISTERS are BIG endian\n"); } - version = ioread32be(&fsl_ifc_ctrl_dev->regs->ifc_rev) & + version = ifc_in32(&fsl_ifc_ctrl_dev->gregs->ifc_rev) & FSL_IFC_VERSION_MASK; + banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8; dev_info(&dev->dev, "IFC version %d.%d, %d banks\n", version >> 24, (version >> 16) & 0xf, banks); @@ -258,6 +251,13 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) fsl_ifc_ctrl_dev->version = version; fsl_ifc_ctrl_dev->banks = banks; + addr = fsl_ifc_ctrl_dev->gregs; + if (version >= FSL_IFC_VERSION_2_0_0) + addr += PGOFFSET_64K; + else + addr += PGOFFSET_4K; + fsl_ifc_ctrl_dev->rregs = addr; + /* get the Controller level irq */ fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0); if (fsl_ifc_ctrl_dev->irq == 0) { diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 43f5a3a4873f..f8a016f038cd 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -232,7 +232,7 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob) struct nand_chip *chip = mtd_to_nand(mtd); struct fsl_ifc_mtd *priv = nand_get_controller_data(chip); struct fsl_ifc_ctrl *ctrl = priv->ctrl; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; int buf_num; ifc_nand_ctrl->page = page_addr; @@ -295,7 +295,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) struct fsl_ifc_mtd *priv = nand_get_controller_data(chip); struct fsl_ifc_ctrl *ctrl = priv->ctrl; struct fsl_ifc_nand_ctrl *nctrl = ifc_nand_ctrl; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; u32 eccstat[4]; int i; @@ -371,7 +371,7 @@ static void fsl_ifc_do_read(struct nand_chip *chip, { struct fsl_ifc_mtd *priv = nand_get_controller_data(chip); struct fsl_ifc_ctrl *ctrl = priv->ctrl; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; /* Program FIR/IFC_NAND_FCR0 for Small/Large page */ if (mtd->writesize > 512) { @@ -411,7 +411,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, struct nand_chip *chip = mtd_to_nand(mtd); struct fsl_ifc_mtd *priv = nand_get_controller_data(chip); struct fsl_ifc_ctrl *ctrl = priv->ctrl; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; /* clear the read buffer */ ifc_nand_ctrl->read_bytes = 0; @@ -723,7 +723,7 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) { struct fsl_ifc_mtd *priv = nand_get_controller_data(chip); struct fsl_ifc_ctrl *ctrl = priv->ctrl; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; u32 nand_fsr; /* Use READ_STATUS command, but wait for the device to be ready */ @@ -825,39 +825,42 @@ static int fsl_ifc_chip_init_tail(struct mtd_info *mtd) static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) { struct fsl_ifc_ctrl *ctrl = priv->ctrl; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_runtime __iomem *ifc_runtime = ctrl->rregs; + struct fsl_ifc_global __iomem *ifc_global = ctrl->gregs; uint32_t csor = 0, csor_8k = 0, csor_ext = 0; uint32_t cs = priv->bank; /* Save CSOR and CSOR_ext */ - csor = ifc_in32(&ifc->csor_cs[cs].csor); - csor_ext = ifc_in32(&ifc->csor_cs[cs].csor_ext); + csor = ifc_in32(&ifc_global->csor_cs[cs].csor); + csor_ext = ifc_in32(&ifc_global->csor_cs[cs].csor_ext); /* chage PageSize 8K and SpareSize 1K*/ csor_8k = (csor & ~(CSOR_NAND_PGS_MASK)) | 0x0018C000; - ifc_out32(csor_8k, &ifc->csor_cs[cs].csor); - ifc_out32(0x0000400, &ifc->csor_cs[cs].csor_ext); + ifc_out32(csor_8k, &ifc_global->csor_cs[cs].csor); + ifc_out32(0x0000400, &ifc_global->csor_cs[cs].csor_ext); /* READID */ ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT), - &ifc->ifc_nand.nand_fir0); + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT), + &ifc_runtime->ifc_nand.nand_fir0); ifc_out32(NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); - ifc_out32(0x0, &ifc->ifc_nand.row3); + &ifc_runtime->ifc_nand.nand_fcr0); + ifc_out32(0x0, &ifc_runtime->ifc_nand.row3); - ifc_out32(0x0, &ifc->ifc_nand.nand_fbcr); + ifc_out32(0x0, &ifc_runtime->ifc_nand.nand_fbcr); /* Program ROW0/COL0 */ - ifc_out32(0x0, &ifc->ifc_nand.row0); - ifc_out32(0x0, &ifc->ifc_nand.col0); + ifc_out32(0x0, &ifc_runtime->ifc_nand.row0); + ifc_out32(0x0, &ifc_runtime->ifc_nand.col0); /* set the chip select for NAND Transaction */ - ifc_out32(cs << IFC_NAND_CSEL_SHIFT, &ifc->ifc_nand.nand_csel); + ifc_out32(cs << IFC_NAND_CSEL_SHIFT, + &ifc_runtime->ifc_nand.nand_csel); /* start read seq */ - ifc_out32(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); + ifc_out32(IFC_NAND_SEQ_STRT_FIR_STRT, + &ifc_runtime->ifc_nand.nandseq_strt); /* wait for command complete flag or timeout */ wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, @@ -867,14 +870,15 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n"); /* Restore CSOR and CSOR_ext */ - ifc_out32(csor, &ifc->csor_cs[cs].csor); - ifc_out32(csor_ext, &ifc->csor_cs[cs].csor_ext); + ifc_out32(csor, &ifc_global->csor_cs[cs].csor); + ifc_out32(csor_ext, &ifc_global->csor_cs[cs].csor_ext); } static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) { struct fsl_ifc_ctrl *ctrl = priv->ctrl; - struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + struct fsl_ifc_global __iomem *ifc_global = ctrl->gregs; + struct fsl_ifc_runtime __iomem *ifc_runtime = ctrl->rregs; struct nand_chip *chip = &priv->chip; struct mtd_info *mtd = nand_to_mtd(&priv->chip); struct nand_ecclayout *layout; @@ -886,7 +890,8 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) /* fill in nand_chip structure */ /* set up function call table */ - if ((ifc_in32(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16) + if ((ifc_in32(&ifc_global->cspr_cs[priv->bank].cspr)) + & CSPR_PORT_SIZE_16) chip->read_byte = fsl_ifc_read_byte16; else chip->read_byte = fsl_ifc_read_byte; @@ -900,13 +905,14 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->bbt_td = &bbt_main_descr; chip->bbt_md = &bbt_mirror_descr; - ifc_out32(0x0, &ifc->ifc_nand.ncfgr); + ifc_out32(0x0, &ifc_runtime->ifc_nand.ncfgr); /* set up nand options */ chip->bbt_options = NAND_BBT_USE_FLASH; chip->options = NAND_NO_SUBPAGE_WRITE; - if (ifc_in32(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) { + if (ifc_in32(&ifc_global->cspr_cs[priv->bank].cspr) + & CSPR_PORT_SIZE_16) { chip->read_byte = fsl_ifc_read_byte16; chip->options |= NAND_BUSWIDTH_16; } else { @@ -919,7 +925,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.read_page = fsl_ifc_read_page; chip->ecc.write_page = fsl_ifc_write_page; - csor = ifc_in32(&ifc->csor_cs[priv->bank].csor); + csor = ifc_in32(&ifc_global->csor_cs[priv->bank].csor); /* Hardware generates ECC per 512 Bytes */ chip->ecc.size = 512; @@ -1007,10 +1013,10 @@ static int fsl_ifc_chip_remove(struct fsl_ifc_mtd *priv) return 0; } -static int match_bank(struct fsl_ifc_regs __iomem *ifc, int bank, +static int match_bank(struct fsl_ifc_global __iomem *ifc_global, int bank, phys_addr_t addr) { - u32 cspr = ifc_in32(&ifc->cspr_cs[bank].cspr); + u32 cspr = ifc_in32(&ifc_global->cspr_cs[bank].cspr); if (!(cspr & CSPR_V)) return 0; @@ -1024,7 +1030,7 @@ static DEFINE_MUTEX(fsl_ifc_nand_mutex); static int fsl_ifc_nand_probe(struct platform_device *dev) { - struct fsl_ifc_regs __iomem *ifc; + struct fsl_ifc_runtime __iomem *ifc; struct fsl_ifc_mtd *priv; struct resource res; static const char *part_probe_types[] @@ -1034,9 +1040,9 @@ static int fsl_ifc_nand_probe(struct platform_device *dev) struct device_node *node = dev->dev.of_node; struct mtd_info *mtd; - if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs) + if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->rregs) return -ENODEV; - ifc = fsl_ifc_ctrl_dev->regs; + ifc = fsl_ifc_ctrl_dev->rregs; /* get, allocate and map the memory resource */ ret = of_address_to_resource(node, 0, &res); @@ -1047,7 +1053,7 @@ static int fsl_ifc_nand_probe(struct platform_device *dev) /* find which chip select it is connected to */ for (bank = 0; bank < fsl_ifc_ctrl_dev->banks; bank++) { - if (match_bank(ifc, bank, res.start)) + if (match_bank(fsl_ifc_ctrl_dev->gregs, bank, res.start)) break; } diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index 0023088b253b..3f9778cbc79d 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -39,6 +39,10 @@ #define FSL_IFC_VERSION_MASK 0x0F0F0000 #define FSL_IFC_VERSION_1_0_0 0x01000000 #define FSL_IFC_VERSION_1_1_0 0x01010000 +#define FSL_IFC_VERSION_2_0_0 0x02000000 + +#define PGOFFSET_64K (64*1024) +#define PGOFFSET_4K (4*1024) /* * CSPR - Chip Select Property Register @@ -723,20 +727,26 @@ struct fsl_ifc_nand { __be32 nand_evter_en; u32 res17[0x2]; __be32 nand_evter_intr_en; - u32 res18[0x2]; + __be32 nand_vol_addr_stat; + u32 res18; __be32 nand_erattr0; __be32 nand_erattr1; u32 res19[0x10]; __be32 nand_fsr; - u32 res20; - __be32 nand_eccstat[4]; - u32 res21[0x20]; + u32 res20[0x3]; + __be32 nand_eccstat[6]; + u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; __be32 nand_autoboot_trgr; u32 res23; __be32 nand_mdr; - u32 res24[0x5C]; + u32 res24[0x1C]; + __be32 nand_dll_lowcfg0; + __be32 nand_dll_lowcfg1; + u32 res25; + __be32 nand_dll_lowstat; + u32 res26[0x3c]; }; /* @@ -771,13 +781,12 @@ struct fsl_ifc_gpcm { __be32 gpcm_erattr1; __be32 gpcm_erattr2; __be32 gpcm_stat; - u32 res4[0x1F3]; }; /* * IFC Controller Registers */ -struct fsl_ifc_regs { +struct fsl_ifc_global { __be32 ifc_rev; u32 res1[0x2]; struct { @@ -803,21 +812,26 @@ struct fsl_ifc_regs { } ftim_cs[FSL_IFC_BANK_COUNT]; u32 res9[0x30]; __be32 rb_stat; - u32 res10[0x2]; + __be32 rb_map; + __be32 wb_map; __be32 ifc_gcr; - u32 res11[0x2]; + u32 res10[0x2]; __be32 cm_evter_stat; - u32 res12[0x2]; + u32 res11[0x2]; __be32 cm_evter_en; - u32 res13[0x2]; + u32 res12[0x2]; __be32 cm_evter_intr_en; - u32 res14[0x2]; + u32 res13[0x2]; __be32 cm_erattr0; __be32 cm_erattr1; - u32 res15[0x2]; + u32 res14[0x2]; __be32 ifc_ccr; __be32 ifc_csr; - u32 res16[0x2EB]; + __be32 ddr_ccr_low; +}; + + +struct fsl_ifc_runtime { struct fsl_ifc_nand ifc_nand; struct fsl_ifc_nor ifc_nor; struct fsl_ifc_gpcm ifc_gpcm; @@ -831,7 +845,8 @@ extern int fsl_ifc_find(phys_addr_t addr_base); struct fsl_ifc_ctrl { /* device info */ struct device *dev; - struct fsl_ifc_regs __iomem *regs; + struct fsl_ifc_global __iomem *gregs; + struct fsl_ifc_runtime __iomem *rregs; int irq; int nand_irq; spinlock_t lock; -- cgit v1.2.3 From 9d02fc2a5129449581c3108c260e96377cf35f7e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 26 Aug 2015 16:08:12 +0200 Subject: mtd: nand: export default read/write oob functions Export the default read/write oob functions (for the standard and syndrome scheme), so that drivers can use them for their raw implementation and implement their own functions for the normal oob operation. This is required if your ECC engine is capable of fixing some of the OOB data. In this case you have to overload the ->read_oob() and ->write_oob(), but if you don't specify the ->read/write_oob_raw() functions they are assigned to the ->read/write_oob() implementation, which is not what you want. Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 18 ++++++++++-------- include/linux/mtd/nand.h | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 0f0c5b190316..13fcddc8a10e 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1893,13 +1893,13 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len, * @chip: nand chip info structure * @page: page number to read */ -static int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, - int page) +int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page) { chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); return 0; } +EXPORT_SYMBOL(nand_read_oob_std); /** * nand_read_oob_syndrome - [REPLACEABLE] OOB data read function for HW ECC @@ -1908,8 +1908,8 @@ static int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, * @chip: nand chip info structure * @page: page number to read */ -static int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, - int page) +int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page) { int length = mtd->oobsize; int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad; @@ -1937,6 +1937,7 @@ static int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, return 0; } +EXPORT_SYMBOL(nand_read_oob_syndrome); /** * nand_write_oob_std - [REPLACEABLE] the most common OOB data write function @@ -1944,8 +1945,7 @@ static int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, * @chip: nand chip info structure * @page: page number to write */ -static int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, - int page) +int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page) { int status = 0; const uint8_t *buf = chip->oob_poi; @@ -1960,6 +1960,7 @@ static int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, return status & NAND_STATUS_FAIL ? -EIO : 0; } +EXPORT_SYMBOL(nand_write_oob_std); /** * nand_write_oob_syndrome - [REPLACEABLE] OOB data write function for HW ECC @@ -1968,8 +1969,8 @@ static int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, * @chip: nand chip info structure * @page: page number to write */ -static int nand_write_oob_syndrome(struct mtd_info *mtd, - struct nand_chip *chip, int page) +int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page) { int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad; int eccsize = chip->ecc.size, length = mtd->oobsize; @@ -2019,6 +2020,7 @@ static int nand_write_oob_syndrome(struct mtd_info *mtd, return status & NAND_STATUS_FAIL ? -EIO : 0; } +EXPORT_SYMBOL(nand_write_oob_syndrome); /** * nand_do_read_oob - [INTERN] NAND read out-of-band diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 1b673e19667c..7e06afb8552c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1078,4 +1078,18 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, void *ecc, int ecclen, void *extraoob, int extraooblen, int threshold); + +/* Default write_oob implementation */ +int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); + +/* Default write_oob syndrome implementation */ +int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page); + +/* Default read_oob implementation */ +int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); + +/* Default read_oob syndrome implementation */ +int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page); #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 75eb2cec251fda33c9bb716ecc372819abb9278a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 4 Feb 2016 09:52:30 +0100 Subject: mtd: add mtd_ooblayout_xxx() helper functions In order to make the ecclayout definition completely dynamic we need to rework the way the OOB layout are defined and iterated. Create a few mtd_ooblayout_xxx() helpers to ease OOB bytes manipulation and hide ecclayout internals to their users. Signed-off-by: Boris Brezillon --- drivers/mtd/mtdcore.c | 400 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/mtd.h | 33 ++++ 2 files changed, 433 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index bee180bd11e7..0290c41e44fc 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1016,6 +1016,406 @@ int mtd_write_oob(struct mtd_info *mtd, loff_t to, } EXPORT_SYMBOL_GPL(mtd_write_oob); +/** + * mtd_ooblayout_ecc - Get the OOB region definition of a specific ECC section + * @mtd: MTD device structure + * @section: ECC section. Depending on the layout you may have all the ECC + * bytes stored in a single contiguous section, or one section + * per ECC chunk (and sometime several sections for a single ECC + * ECC chunk) + * @oobecc: OOB region struct filled with the appropriate ECC position + * information + * + * This functions return ECC section information in the OOB area. I you want + * to get all the ECC bytes information, then you should call + * mtd_ooblayout_ecc(mtd, section++, oobecc) until it returns -ERANGE. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobecc) +{ + int eccbyte = 0, cursection = 0, length = 0, eccpos = 0; + + memset(oobecc, 0, sizeof(*oobecc)); + + if (!mtd || section < 0) + return -EINVAL; + + if (!mtd->ecclayout) + return -ENOTSUPP; + + /* + * This logic allows us to reuse the ->ecclayout information and + * expose them as ECC regions (as done for the OOB free regions). + * + * TODO: this should be dropped as soon as we get rid of the + * ->ecclayout field. + */ + for (eccbyte = 0; eccbyte < mtd->ecclayout->eccbytes; eccbyte++) { + eccpos = mtd->ecclayout->eccpos[eccbyte]; + + if (eccbyte < mtd->ecclayout->eccbytes - 1) { + int neccpos = mtd->ecclayout->eccpos[eccbyte + 1]; + + if (eccpos + 1 == neccpos) { + length++; + continue; + } + } + + if (section == cursection) + break; + + length = 0; + cursection++; + } + + if (cursection != section || eccbyte >= mtd->ecclayout->eccbytes) + return -ERANGE; + + oobecc->length = length + 1; + oobecc->offset = eccpos - length; + + return 0; +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_ecc); + +/** + * mtd_ooblayout_free - Get the OOB region definition of a specific free + * section + * @mtd: MTD device structure + * @section: Free section you are interested in. Depending on the layout + * you may have all the free bytes stored in a single contiguous + * section, or one section per ECC chunk plus an extra section + * for the remaining bytes (or other funky layout). + * @oobfree: OOB region struct filled with the appropriate free position + * information + * + * This functions return free bytes position in the OOB area. I you want + * to get all the free bytes information, then you should call + * mtd_ooblayout_free(mtd, section++, oobfree) until it returns -ERANGE. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobfree) +{ + memset(oobfree, 0, sizeof(*oobfree)); + + if (!mtd || section < 0) + return -EINVAL; + + if (!mtd->ecclayout) + return -ENOTSUPP; + + if (section >= MTD_MAX_OOBFREE_ENTRIES_LARGE) + return -ERANGE; + + oobfree->offset = mtd->ecclayout->oobfree[section].offset; + oobfree->length = mtd->ecclayout->oobfree[section].length; + + return 0; +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_free); + +/** + * mtd_ooblayout_find_region - Find the region attached to a specific byte + * @mtd: mtd info structure + * @byte: the byte we are searching for + * @sectionp: pointer where the section id will be stored + * @oobregion: used to retrieve the ECC position + * @iter: iterator function. Should be either mtd_ooblayout_free or + * mtd_ooblayout_ecc depending on the region type you're searching for + * + * This functions returns the section id and oobregion information of a + * specific byte. For example, say you want to know where the 4th ECC byte is + * stored, you'll use: + * + * mtd_ooblayout_find_region(mtd, 3, §ion, &oobregion, mtd_ooblayout_ecc); + * + * Returns zero on success, a negative error code otherwise. + */ +static int mtd_ooblayout_find_region(struct mtd_info *mtd, int byte, + int *sectionp, struct mtd_oob_region *oobregion, + int (*iter)(struct mtd_info *, + int section, + struct mtd_oob_region *oobregion)) +{ + int pos = 0, ret, section = 0; + + memset(oobregion, 0, sizeof(*oobregion)); + + while (1) { + ret = iter(mtd, section, oobregion); + if (ret) + return ret; + + if (pos + oobregion->length > byte) + break; + + pos += oobregion->length; + section++; + } + + /* + * Adjust region info to make it start at the beginning at the + * 'start' ECC byte. + */ + oobregion->offset += byte - pos; + oobregion->length -= byte - pos; + *sectionp = section; + + return 0; +} + +/** + * mtd_ooblayout_find_eccregion - Find the ECC region attached to a specific + * ECC byte + * @mtd: mtd info structure + * @eccbyte: the byte we are searching for + * @sectionp: pointer where the section id will be stored + * @oobregion: OOB region information + * + * Works like mtd_ooblayout_find_region() except it searches for a specific ECC + * byte. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_find_eccregion(struct mtd_info *mtd, int eccbyte, + int *section, + struct mtd_oob_region *oobregion) +{ + return mtd_ooblayout_find_region(mtd, eccbyte, section, oobregion, + mtd_ooblayout_ecc); +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_find_eccregion); + +/** + * mtd_ooblayout_get_bytes - Extract OOB bytes from the oob buffer + * @mtd: mtd info structure + * @buf: destination buffer to store OOB bytes + * @oobbuf: OOB buffer + * @start: first byte to retrieve + * @nbytes: number of bytes to retrieve + * @iter: section iterator + * + * Extract bytes attached to a specific category (ECC or free) + * from the OOB buffer and copy them into buf. + * + * Returns zero on success, a negative error code otherwise. + */ +static int mtd_ooblayout_get_bytes(struct mtd_info *mtd, u8 *buf, + const u8 *oobbuf, int start, int nbytes, + int (*iter)(struct mtd_info *, + int section, + struct mtd_oob_region *oobregion)) +{ + struct mtd_oob_region oobregion = { }; + int section = 0, ret; + + ret = mtd_ooblayout_find_region(mtd, start, §ion, + &oobregion, iter); + + while (!ret) { + int cnt; + + cnt = oobregion.length > nbytes ? nbytes : oobregion.length; + memcpy(buf, oobbuf + oobregion.offset, cnt); + buf += cnt; + nbytes -= cnt; + + if (!nbytes) + break; + + ret = iter(mtd, ++section, &oobregion); + } + + return ret; +} + +/** + * mtd_ooblayout_set_bytes - put OOB bytes into the oob buffer + * @mtd: mtd info structure + * @buf: source buffer to get OOB bytes from + * @oobbuf: OOB buffer + * @start: first OOB byte to set + * @nbytes: number of OOB bytes to set + * @iter: section iterator + * + * Fill the OOB buffer with data provided in buf. The category (ECC or free) + * is selected by passing the appropriate iterator. + * + * Returns zero on success, a negative error code otherwise. + */ +static int mtd_ooblayout_set_bytes(struct mtd_info *mtd, const u8 *buf, + u8 *oobbuf, int start, int nbytes, + int (*iter)(struct mtd_info *, + int section, + struct mtd_oob_region *oobregion)) +{ + struct mtd_oob_region oobregion = { }; + int section = 0, ret; + + ret = mtd_ooblayout_find_region(mtd, start, §ion, + &oobregion, iter); + + while (!ret) { + int cnt; + + cnt = oobregion.length > nbytes ? nbytes : oobregion.length; + memcpy(oobbuf + oobregion.offset, buf, cnt); + buf += cnt; + nbytes -= cnt; + + if (!nbytes) + break; + + ret = iter(mtd, ++section, &oobregion); + } + + return ret; +} + +/** + * mtd_ooblayout_count_bytes - count the number of bytes in a OOB category + * @mtd: mtd info structure + * @iter: category iterator + * + * Count the number of bytes in a given category. + * + * Returns a positive value on success, a negative error code otherwise. + */ +static int mtd_ooblayout_count_bytes(struct mtd_info *mtd, + int (*iter)(struct mtd_info *, + int section, + struct mtd_oob_region *oobregion)) +{ + struct mtd_oob_region oobregion = { }; + int section = 0, ret, nbytes = 0; + + while (1) { + ret = iter(mtd, section++, &oobregion); + if (ret) { + if (ret == -ERANGE) + ret = nbytes; + break; + } + + nbytes += oobregion.length; + } + + return ret; +} + +/** + * mtd_ooblayout_get_eccbytes - extract ECC bytes from the oob buffer + * @mtd: mtd info structure + * @eccbuf: destination buffer to store ECC bytes + * @oobbuf: OOB buffer + * @start: first ECC byte to retrieve + * @nbytes: number of ECC bytes to retrieve + * + * Works like mtd_ooblayout_get_bytes(), except it acts on ECC bytes. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_get_eccbytes(struct mtd_info *mtd, u8 *eccbuf, + const u8 *oobbuf, int start, int nbytes) +{ + return mtd_ooblayout_get_bytes(mtd, eccbuf, oobbuf, start, nbytes, + mtd_ooblayout_ecc); +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_get_eccbytes); + +/** + * mtd_ooblayout_set_eccbytes - set ECC bytes into the oob buffer + * @mtd: mtd info structure + * @eccbuf: source buffer to get ECC bytes from + * @oobbuf: OOB buffer + * @start: first ECC byte to set + * @nbytes: number of ECC bytes to set + * + * Works like mtd_ooblayout_set_bytes(), except it acts on ECC bytes. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_set_eccbytes(struct mtd_info *mtd, const u8 *eccbuf, + u8 *oobbuf, int start, int nbytes) +{ + return mtd_ooblayout_set_bytes(mtd, eccbuf, oobbuf, start, nbytes, + mtd_ooblayout_ecc); +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_set_eccbytes); + +/** + * mtd_ooblayout_get_databytes - extract data bytes from the oob buffer + * @mtd: mtd info structure + * @databuf: destination buffer to store ECC bytes + * @oobbuf: OOB buffer + * @start: first ECC byte to retrieve + * @nbytes: number of ECC bytes to retrieve + * + * Works like mtd_ooblayout_get_bytes(), except it acts on free bytes. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_get_databytes(struct mtd_info *mtd, u8 *databuf, + const u8 *oobbuf, int start, int nbytes) +{ + return mtd_ooblayout_get_bytes(mtd, databuf, oobbuf, start, nbytes, + mtd_ooblayout_free); +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_get_databytes); + +/** + * mtd_ooblayout_get_eccbytes - set data bytes into the oob buffer + * @mtd: mtd info structure + * @eccbuf: source buffer to get data bytes from + * @oobbuf: OOB buffer + * @start: first ECC byte to set + * @nbytes: number of ECC bytes to set + * + * Works like mtd_ooblayout_get_bytes(), except it acts on free bytes. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, + u8 *oobbuf, int start, int nbytes) +{ + return mtd_ooblayout_set_bytes(mtd, databuf, oobbuf, start, nbytes, + mtd_ooblayout_free); +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_set_databytes); + +/** + * mtd_ooblayout_count_freebytes - count the number of free bytes in OOB + * @mtd: mtd info structure + * + * Works like mtd_ooblayout_count_bytes(), except it count free bytes. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_count_freebytes(struct mtd_info *mtd) +{ + return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_free); +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_count_freebytes); + +/** + * mtd_ooblayout_count_freebytes - count the number of ECC bytes in OOB + * @mtd: mtd info structure + * + * Works like mtd_ooblayout_count_bytes(), except it count ECC bytes. + * + * Returns zero on success, a negative error code otherwise. + */ +int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd) +{ + return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_ecc); +} +EXPORT_SYMBOL_GPL(mtd_ooblayout_count_eccbytes); + /* * Method to access the protection register area, present in some flash * devices. The user data is one time programmable but the factory data is read diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index ef9fea4fc400..117ca1ff581d 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -108,6 +108,21 @@ struct nand_ecclayout { struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES_LARGE]; }; +/** + * struct mtd_oob_region - oob region definition + * @offset: region offset + * @length: region length + * + * This structure describes a region of the OOB area, and is used + * to retrieve ECC or free bytes sections. + * Each section is defined by an offset within the OOB area and a + * length. + */ +struct mtd_oob_region { + u32 offset; + u32 length; +}; + struct module; /* only needed for owner field in mtd_info */ struct mtd_info { @@ -253,6 +268,24 @@ struct mtd_info { int usecount; }; +int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobecc); +int mtd_ooblayout_find_eccregion(struct mtd_info *mtd, int eccbyte, + int *section, + struct mtd_oob_region *oobregion); +int mtd_ooblayout_get_eccbytes(struct mtd_info *mtd, u8 *eccbuf, + const u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_set_eccbytes(struct mtd_info *mtd, const u8 *eccbuf, + u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobfree); +int mtd_ooblayout_get_databytes(struct mtd_info *mtd, u8 *databuf, + const u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, + u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_count_freebytes(struct mtd_info *mtd); +int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd); + static inline void mtd_set_of_node(struct mtd_info *mtd, struct device_node *np) { -- cgit v1.2.3 From 036d6543f85319ffe96afad6de73d3a220917a63 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 18:53:44 +0100 Subject: mtd: add mtd_set_ecclayout() helper function Add an mtd_set_ecclayout() helper function to avoid direct accesses to the mtd->ecclayout field. This will ease future reworks of ECC layout definition. Signed-off-by: Boris Brezillon --- include/linux/mtd/mtd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 117ca1ff581d..e62da8462493 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -286,6 +286,12 @@ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, int mtd_ooblayout_count_freebytes(struct mtd_info *mtd); int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd); +static inline void mtd_set_ecclayout(struct mtd_info *mtd, + struct nand_ecclayout *ecclayout) +{ + mtd->ecclayout = ecclayout; +} + static inline void mtd_set_of_node(struct mtd_info *mtd, struct device_node *np) { -- cgit v1.2.3 From adbbc3bc827eb1f43a932d783f09ba55c8ec8379 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 19:01:31 +0100 Subject: mtd: create an mtd_ooblayout_ops struct to ease ECC layout definition ECC layout definitions are currently exposed using the nand_ecclayout struct which embeds oobfree and eccpos arrays with predefined size. This approach was acceptable when NAND chips were providing relatively small OOB regions, but MLC and TLC now provide OOB regions of several hundreds of bytes, which implies a non negligible overhead for everybody even those who only need to support legacy NANDs. Create an mtd_ooblayout_ops interface providing the same functionality (expose the ECC and oobfree layout) without the need for this huge structure. The mtd->ecclayout is now deprecated and should be replaced by the equivalent mtd_ooblayout_ops. In the meantime we provide a wrapper around the ->ecclayout field to ease migration to this new model. Signed-off-by: Boris Brezillon --- drivers/mtd/mtdchar.c | 4 +- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdcore.c | 165 +++++++++++++++++++++++++++++++++++------------- drivers/mtd/mtdpart.c | 23 ++++++- include/linux/mtd/mtd.h | 32 ++++++++-- 5 files changed, 174 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index cd64ab76dd7b..3fad2c7425b0 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -888,7 +888,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) { struct nand_oobinfo oi; - if (!mtd->ecclayout) + if (!mtd->ooblayout) return -EOPNOTSUPP; ret = get_oobinfo(mtd, &oi); @@ -982,7 +982,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) { struct nand_ecclayout_user *usrlay; - if (!mtd->ecclayout) + if (!mtd->ooblayout) return -EOPNOTSUPP; usrlay = kmalloc(sizeof(*usrlay), GFP_KERNEL); diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 481565e5fbaa..d573606b91c2 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -777,7 +777,7 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c } - mtd_set_ecclayout(&concat->mtd, subdev[0]->ecclayout); + mtd_set_ooblayout(&concat->mtd, subdev[0]->ooblayout); concat->num_subdev = num_devs; concat->mtd.name = name; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 0290c41e44fc..134ed2f7b919 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1035,49 +1035,15 @@ EXPORT_SYMBOL_GPL(mtd_write_oob); int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *oobecc) { - int eccbyte = 0, cursection = 0, length = 0, eccpos = 0; - memset(oobecc, 0, sizeof(*oobecc)); if (!mtd || section < 0) return -EINVAL; - if (!mtd->ecclayout) + if (!mtd->ooblayout || !mtd->ooblayout->ecc) return -ENOTSUPP; - /* - * This logic allows us to reuse the ->ecclayout information and - * expose them as ECC regions (as done for the OOB free regions). - * - * TODO: this should be dropped as soon as we get rid of the - * ->ecclayout field. - */ - for (eccbyte = 0; eccbyte < mtd->ecclayout->eccbytes; eccbyte++) { - eccpos = mtd->ecclayout->eccpos[eccbyte]; - - if (eccbyte < mtd->ecclayout->eccbytes - 1) { - int neccpos = mtd->ecclayout->eccpos[eccbyte + 1]; - - if (eccpos + 1 == neccpos) { - length++; - continue; - } - } - - if (section == cursection) - break; - - length = 0; - cursection++; - } - - if (cursection != section || eccbyte >= mtd->ecclayout->eccbytes) - return -ERANGE; - - oobecc->length = length + 1; - oobecc->offset = eccpos - length; - - return 0; + return mtd->ooblayout->ecc(mtd, section, oobecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_ecc); @@ -1106,16 +1072,10 @@ int mtd_ooblayout_free(struct mtd_info *mtd, int section, if (!mtd || section < 0) return -EINVAL; - if (!mtd->ecclayout) + if (!mtd->ooblayout || !mtd->ooblayout->free) return -ENOTSUPP; - if (section >= MTD_MAX_OOBFREE_ENTRIES_LARGE) - return -ERANGE; - - oobfree->offset = mtd->ecclayout->oobfree[section].offset; - oobfree->length = mtd->ecclayout->oobfree[section].length; - - return 0; + return mtd->ooblayout->free(mtd, section, oobfree); } EXPORT_SYMBOL_GPL(mtd_ooblayout_free); @@ -1416,6 +1376,123 @@ int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd) } EXPORT_SYMBOL_GPL(mtd_ooblayout_count_eccbytes); +/** + * mtd_ecclayout_ecc - Default ooblayout_ecc iterator implementation + * @mtd: MTD device structure + * @section: ECC section. Depending on the layout you may have all the ECC + * bytes stored in a single contiguous section, or one section + * per ECC chunk (and sometime several sections for a single ECC + * ECC chunk) + * @oobecc: OOB region struct filled with the appropriate ECC position + * information + * + * This function is just a wrapper around the mtd->ecclayout field and is + * here to ease the transition to the mtd_ooblayout_ops approach. + * All it does is convert the layout->eccpos information into proper oob + * region definitions. + * + * Returns zero on success, a negative error code otherwise. + */ +static int mtd_ecclayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobecc) +{ + int eccbyte = 0, cursection = 0, length = 0, eccpos = 0; + + if (!mtd->ecclayout) + return -ENOTSUPP; + + /* + * This logic allows us to reuse the ->ecclayout information and + * expose them as ECC regions (as done for the OOB free regions). + * + * TODO: this should be dropped as soon as we get rid of the + * ->ecclayout field. + */ + for (eccbyte = 0; eccbyte < mtd->ecclayout->eccbytes; eccbyte++) { + eccpos = mtd->ecclayout->eccpos[eccbyte]; + + if (eccbyte < mtd->ecclayout->eccbytes - 1) { + int neccpos = mtd->ecclayout->eccpos[eccbyte + 1]; + + if (eccpos + 1 == neccpos) { + length++; + continue; + } + } + + if (section == cursection) + break; + + length = 0; + cursection++; + } + + if (cursection != section || eccbyte >= mtd->ecclayout->eccbytes) + return -ERANGE; + + oobecc->length = length + 1; + oobecc->offset = eccpos - length; + + return 0; +} + +/** + * mtd_ecclayout_ecc - Default ooblayout_free iterator implementation + * @mtd: MTD device structure + * @section: Free section. Depending on the layout you may have all the free + * bytes stored in a single contiguous section, or one section + * per ECC chunk (and sometime several sections for a single ECC + * ECC chunk) + * @oobfree: OOB region struct filled with the appropriate free position + * information + * + * This function is just a wrapper around the mtd->ecclayout field and is + * here to ease the transition to the mtd_ooblayout_ops approach. + * All it does is convert the layout->oobfree information into proper oob + * region definitions. + * + * Returns zero on success, a negative error code otherwise. + */ +static int mtd_ecclayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobfree) +{ + struct nand_ecclayout *layout = mtd->ecclayout; + + if (!layout) + return -ENOTSUPP; + + if (section >= MTD_MAX_OOBFREE_ENTRIES_LARGE || + !layout->oobfree[section].length) + return -ERANGE; + + oobfree->offset = layout->oobfree[section].offset; + oobfree->length = layout->oobfree[section].length; + + return 0; +} + +static const struct mtd_ooblayout_ops mtd_ecclayout_wrapper_ops = { + .ecc = mtd_ecclayout_ecc, + .free = mtd_ecclayout_free, +}; + +/** + * mtd_set_ecclayout - Attach an ecclayout to an MTD device + * @mtd: MTD device structure + * @ecclayout: The ecclayout to attach to the device + * + * Returns zero on success, a negative error code otherwise. + */ +void mtd_set_ecclayout(struct mtd_info *mtd, struct nand_ecclayout *ecclayout) +{ + if (!mtd || !ecclayout) + return; + + mtd->ecclayout = ecclayout; + mtd_set_ooblayout(mtd, &mtd_ecclayout_wrapper_ops); +} +EXPORT_SYMBOL_GPL(mtd_set_ecclayout); + /* * Method to access the protection register area, present in some flash * devices. The user data is one time programmable but the factory data is read diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index f53d9d72b23a..1f13e32556f8 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -317,6 +317,27 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs) return res; } +static int part_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct mtd_part *part = mtd_to_part(mtd); + + return mtd_ooblayout_ecc(part->master, section, oobregion); +} + +static int part_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct mtd_part *part = mtd_to_part(mtd); + + return mtd_ooblayout_free(part->master, section, oobregion); +} + +static const struct mtd_ooblayout_ops part_ooblayout_ops = { + .ecc = part_ooblayout_ecc, + .free = part_ooblayout_free, +}; + static inline void free_partition(struct mtd_part *p) { kfree(p->mtd.name); @@ -533,7 +554,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *master, part->name); } - mtd_set_ecclayout(&slave->mtd, master->ecclayout); + mtd_set_ooblayout(&slave->mtd, &part_ooblayout_ops); slave->mtd.ecc_step_size = master->ecc_step_size; slave->mtd.ecc_strength = master->ecc_strength; slave->mtd.bitflip_threshold = master->bitflip_threshold; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index e62da8462493..177bf314ad70 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -101,6 +101,9 @@ struct mtd_oob_ops { * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained * for export to user-space via the ECCGETLAYOUT ioctl. * nand_ecclayout should be expandable in the future simply by the above macros. + * + * This structure is now deprecated, you should use struct nand_ecclayout_ops + * to describe your OOB layout. */ struct nand_ecclayout { __u32 eccbytes; @@ -123,6 +126,22 @@ struct mtd_oob_region { u32 length; }; +/* + * struct mtd_ooblayout_ops - NAND OOB layout operations + * @ecc: function returning an ECC region in the OOB area. + * Should return -ERANGE if %section exceeds the total number of + * ECC sections. + * @free: function returning a free region in the OOB area. + * Should return -ERANGE if %section exceeds the total number of + * free sections. + */ +struct mtd_ooblayout_ops { + int (*ecc)(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobecc); + int (*free)(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobfree); +}; + struct module; /* only needed for owner field in mtd_info */ struct mtd_info { @@ -181,9 +200,12 @@ struct mtd_info { const char *name; int index; - /* ECC layout structure pointer - read only! */ + /* [Deprecated] ECC layout structure pointer - read only! */ struct nand_ecclayout *ecclayout; + /* OOB layout description */ + const struct mtd_ooblayout_ops *ooblayout; + /* the ecc step size. */ unsigned int ecc_step_size; @@ -286,10 +308,12 @@ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, int mtd_ooblayout_count_freebytes(struct mtd_info *mtd); int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd); -static inline void mtd_set_ecclayout(struct mtd_info *mtd, - struct nand_ecclayout *ecclayout) +void mtd_set_ecclayout(struct mtd_info *mtd, struct nand_ecclayout *ecclayout); + +static inline void mtd_set_ooblayout(struct mtd_info *mtd, + const struct mtd_ooblayout_ops *ooblayout) { - mtd->ecclayout = ecclayout; + mtd->ooblayout = ooblayout; } static inline void mtd_set_of_node(struct mtd_info *mtd, -- cgit v1.2.3 From 41b207a70d3a86b9e2eede155e87838234c7cbd5 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 19:06:15 +0100 Subject: mtd: nand: implement the default mtd_ooblayout_ops Replace the default nand_ecclayout definitions for large and small page devices with the equivalent mtd_ooblayout_ops. Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 142 ++++++++++++++++++++++++++++--------------- include/linux/mtd/nand.h | 3 + 2 files changed, 96 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index feb1448e9dd6..e8332ea45739 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -47,54 +47,96 @@ #include #include +static int nand_get_device(struct mtd_info *mtd, int new_state); + +static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, + struct mtd_oob_ops *ops); + /* Define default oob placement schemes for large and small page devices */ -static struct nand_ecclayout nand_oob_8 = { - .eccbytes = 3, - .eccpos = {0, 1, 2}, - .oobfree = { - {.offset = 3, - .length = 2}, - {.offset = 6, - .length = 2} } -}; +static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct nand_ecc_ctrl *ecc = &chip->ecc; -static struct nand_ecclayout nand_oob_16 = { - .eccbytes = 6, - .eccpos = {0, 1, 2, 3, 6, 7}, - .oobfree = { - {.offset = 8, - . length = 8} } -}; + if (section > 1) + return -ERANGE; -static struct nand_ecclayout nand_oob_64 = { - .eccbytes = 24, - .eccpos = { - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63}, - .oobfree = { - {.offset = 2, - .length = 38} } -}; + if (!section) { + oobregion->offset = 0; + oobregion->length = 4; + } else { + oobregion->offset = 6; + oobregion->length = ecc->total - 4; + } -static struct nand_ecclayout nand_oob_128 = { - .eccbytes = 48, - .eccpos = { - 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, - 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127}, - .oobfree = { - {.offset = 2, - .length = 78} } + return 0; +} + +static int nand_ooblayout_free_sp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + if (section > 1) + return -ERANGE; + + if (mtd->oobsize == 16) { + if (section) + return -ERANGE; + + oobregion->length = 8; + oobregion->offset = 8; + } else { + oobregion->length = 2; + if (!section) + oobregion->offset = 3; + else + oobregion->offset = 6; + } + + return 0; +} + +const struct mtd_ooblayout_ops nand_ooblayout_sp_ops = { + .ecc = nand_ooblayout_ecc_sp, + .free = nand_ooblayout_free_sp, }; +EXPORT_SYMBOL_GPL(nand_ooblayout_sp_ops); -static int nand_get_device(struct mtd_info *mtd, int new_state); +static int nand_ooblayout_ecc_lp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct nand_ecc_ctrl *ecc = &chip->ecc; -static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, - struct mtd_oob_ops *ops); + if (section) + return -ERANGE; + + oobregion->length = ecc->total; + oobregion->offset = mtd->oobsize - oobregion->length; + + return 0; +} + +static int nand_ooblayout_free_lp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct nand_ecc_ctrl *ecc = &chip->ecc; + + if (section) + return -ERANGE; + + oobregion->length = mtd->oobsize - ecc->total - 2; + oobregion->offset = 2; + + return 0; +} + +const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = { + .ecc = nand_ooblayout_ecc_lp, + .free = nand_ooblayout_free_lp, +}; +EXPORT_SYMBOL_GPL(nand_ooblayout_lp_ops); static int check_offs_len(struct mtd_info *mtd, loff_t ofs, uint64_t len) @@ -4109,22 +4151,25 @@ int nand_scan_tail(struct mtd_info *mtd) /* Set the internal oob buffer location, just after the page data */ chip->oob_poi = chip->buffers->databuf + mtd->writesize; + /* + * Set the provided ECC layout. If ecc->layout is NULL, the MTD core + * will just leave mtd->ooblayout to NULL, if it's not NULL, it will + * set ->ooblayout to the default ecclayout wrapper. + */ + mtd_set_ecclayout(mtd, ecc->layout); + /* * If no default placement scheme is given, select an appropriate one. */ - if (!ecc->layout && (ecc->mode != NAND_ECC_SOFT_BCH)) { + if (!mtd->ooblayout && (ecc->mode != NAND_ECC_SOFT_BCH)) { switch (mtd->oobsize) { case 8: - ecc->layout = &nand_oob_8; - break; case 16: - ecc->layout = &nand_oob_16; + mtd_set_ooblayout(mtd, &nand_ooblayout_sp_ops); break; case 64: - ecc->layout = &nand_oob_64; - break; case 128: - ecc->layout = &nand_oob_128; + mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); break; default: WARN(1, "No oob scheme defined for oobsize %d\n", @@ -4285,7 +4330,6 @@ int nand_scan_tail(struct mtd_info *mtd) ecc->write_oob_raw = ecc->write_oob; /* propagate ecc info to mtd_info */ - mtd_set_ecclayout(mtd, ecc->layout); mtd->ecc_strength = ecc->strength; mtd->ecc_step_size = ecc->size; diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7e06afb8552c..f2ded7b1b3b8 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -748,6 +748,9 @@ struct nand_chip { void *priv; }; +extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; +extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; + static inline void nand_set_flash_node(struct nand_chip *chip, struct device_node *np) { -- cgit v1.2.3 From 74e1fbb1375a3ede3e17da22911761ce9bc8f53f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:17 +0200 Subject: of: Introduce struct of_phandle_iterator This struct carrys all necessary information to iterate over a list of phandles and extract the arguments. Add an init-function for the iterator and make use of it in __of_parse_phandle_with_args(). Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- drivers/of/base.c | 99 +++++++++++++++++++++++++++++++++--------------------- include/linux/of.h | 33 ++++++++++++++++++ 2 files changed, 93 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index b299de2b3afa..1c6f43b5737d 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1440,35 +1440,56 @@ void of_print_phandle_args(const char *msg, const struct of_phandle_args *args) printk("\n"); } +int of_phandle_iterator_init(struct of_phandle_iterator *it, + const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count) +{ + const __be32 *list; + int size; + + memset(it, 0, sizeof(*it)); + + list = of_get_property(np, list_name, &size); + if (!list) + return -ENOENT; + + it->cells_name = cells_name; + it->cell_count = cell_count; + it->parent = np; + it->list_end = list + size / sizeof(*list); + it->phandle_end = list; + it->cur = list; + + return 0; +} + static int __of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int cell_count, int index, struct of_phandle_args *out_args) { - const __be32 *list, *list_end; - int rc = 0, size, cur_index = 0; - uint32_t count = 0; - struct device_node *node = NULL; - phandle phandle; + struct of_phandle_iterator it; + int rc, cur_index = 0; - /* Retrieve the phandle list property */ - list = of_get_property(np, list_name, &size); - if (!list) - return -ENOENT; - list_end = list + size / sizeof(*list); + rc = of_phandle_iterator_init(&it, np, list_name, + cells_name, cell_count); + if (rc) + return rc; /* Loop over the phandles until all the requested entry is found */ - while (list < list_end) { + while (it.cur < it.list_end) { rc = -EINVAL; - count = 0; + it.cur_count = 0; /* * If phandle is 0, then it is an empty entry with no * arguments. Skip forward to the next entry. */ - phandle = be32_to_cpup(list++); - if (phandle) { + it.phandle = be32_to_cpup(it.cur++); + if (it.phandle) { /* * Find the provider node and parse the #*-cells * property to determine the argument length. @@ -1478,34 +1499,34 @@ static int __of_parse_phandle_with_args(const struct device_node *np, * except when we're going to return the found node * below. */ - if (cells_name || cur_index == index) { - node = of_find_node_by_phandle(phandle); - if (!node) { + if (it.cells_name || cur_index == index) { + it.node = of_find_node_by_phandle(it.phandle); + if (!it.node) { pr_err("%s: could not find phandle\n", - np->full_name); + it.parent->full_name); goto err; } } - if (cells_name) { - if (of_property_read_u32(node, cells_name, - &count)) { + if (it.cells_name) { + if (of_property_read_u32(it.node, it.cells_name, + &it.cur_count)) { pr_err("%s: could not get %s for %s\n", - np->full_name, cells_name, - node->full_name); + it.parent->full_name, it.cells_name, + it.node->full_name); goto err; } } else { - count = cell_count; + it.cur_count = it.cell_count; } /* * Make sure that the arguments actually fit in the * remaining property data length */ - if (list + count > list_end) { + if (it.cur + it.cur_count > it.list_end) { pr_err("%s: arguments longer than property\n", - np->full_name); + it.parent->full_name); goto err; } } @@ -1518,28 +1539,28 @@ static int __of_parse_phandle_with_args(const struct device_node *np, */ rc = -ENOENT; if (cur_index == index) { - if (!phandle) + if (!it.phandle) goto err; if (out_args) { int i; - if (WARN_ON(count > MAX_PHANDLE_ARGS)) - count = MAX_PHANDLE_ARGS; - out_args->np = node; - out_args->args_count = count; - for (i = 0; i < count; i++) - out_args->args[i] = be32_to_cpup(list++); + if (WARN_ON(it.cur_count > MAX_PHANDLE_ARGS)) + it.cur_count = MAX_PHANDLE_ARGS; + out_args->np = it.node; + out_args->args_count = it.cur_count; + for (i = 0; i < it.cur_count; i++) + out_args->args[i] = be32_to_cpup(it.cur++); } else { - of_node_put(node); + of_node_put(it.node); } /* Found it! return success */ return 0; } - of_node_put(node); - node = NULL; - list += count; + of_node_put(it.node); + it.node = NULL; + it.cur += it.cur_count; cur_index++; } @@ -1551,8 +1572,8 @@ static int __of_parse_phandle_with_args(const struct device_node *np, */ rc = index < 0 ? cur_index : -ENOENT; err: - if (node) - of_node_put(node); + if (it.node) + of_node_put(it.node); return rc; } diff --git a/include/linux/of.h b/include/linux/of.h index 7fcb681baadf..0f187dbb890b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -75,6 +75,23 @@ struct of_phandle_args { uint32_t args[MAX_PHANDLE_ARGS]; }; +struct of_phandle_iterator { + /* Common iterator information */ + const char *cells_name; + int cell_count; + const struct device_node *parent; + + /* List size information */ + const __be32 *list_end; + const __be32 *phandle_end; + + /* Current position state */ + const __be32 *cur; + uint32_t cur_count; + phandle phandle; + struct device_node *node; +}; + struct of_reconfig_data { struct device_node *dn; struct property *prop; @@ -334,6 +351,13 @@ extern int of_parse_phandle_with_fixed_args(const struct device_node *np, extern int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name); +/* phandle iterator functions */ +extern int of_phandle_iterator_init(struct of_phandle_iterator *it, + const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count); + extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); @@ -608,6 +632,15 @@ static inline int of_count_phandle_with_args(struct device_node *np, return -ENOSYS; } +static inline int of_phandle_iterator_init(struct of_phandle_iterator *it, + const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count) +{ + return -ENOSYS; +} + static inline int of_alias_get_id(struct device_node *np, const char *stem) { return -ENOSYS; -- cgit v1.2.3 From cd209b412c8a5d632b51af1e45576f0d00b8105f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:18 +0200 Subject: of: Move phandle walking to of_phandle_iterator_next() Move the code to walk over the phandles out of the loop in __of_parse_phandle_with_args() to a separate function that just works with the iterator handle: of_phandle_iterator_next(). Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- drivers/of/base.c | 130 ++++++++++++++++++++++++++++++----------------------- include/linux/of.h | 7 +++ 2 files changed, 81 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index 1c6f43b5737d..69286ec206f7 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1465,6 +1465,75 @@ int of_phandle_iterator_init(struct of_phandle_iterator *it, return 0; } +int of_phandle_iterator_next(struct of_phandle_iterator *it) +{ + uint32_t count = 0; + + if (it->node) { + of_node_put(it->node); + it->node = NULL; + } + + if (!it->cur || it->phandle_end >= it->list_end) + return -ENOENT; + + it->cur = it->phandle_end; + + /* If phandle is 0, then it is an empty entry with no arguments. */ + it->phandle = be32_to_cpup(it->cur++); + + if (it->phandle) { + + /* + * Find the provider node and parse the #*-cells property to + * determine the argument length. + */ + it->node = of_find_node_by_phandle(it->phandle); + + if (it->cells_name) { + if (!it->node) { + pr_err("%s: could not find phandle\n", + it->parent->full_name); + goto err; + } + + if (of_property_read_u32(it->node, it->cells_name, + &count)) { + pr_err("%s: could not get %s for %s\n", + it->parent->full_name, + it->cells_name, + it->node->full_name); + goto err; + } + } else { + count = it->cell_count; + } + + /* + * Make sure that the arguments actually fit in the remaining + * property data length + */ + if (it->cur + count > it->list_end) { + pr_err("%s: arguments longer than property\n", + it->parent->full_name); + goto err; + } + } + + it->phandle_end = it->cur + count; + it->cur_count = count; + + return 0; + +err: + if (it->node) { + of_node_put(it->node); + it->node = NULL; + } + + return -EINVAL; +} + static int __of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, @@ -1480,59 +1549,9 @@ static int __of_parse_phandle_with_args(const struct device_node *np, return rc; /* Loop over the phandles until all the requested entry is found */ - while (it.cur < it.list_end) { - rc = -EINVAL; - it.cur_count = 0; - - /* - * If phandle is 0, then it is an empty entry with no - * arguments. Skip forward to the next entry. - */ - it.phandle = be32_to_cpup(it.cur++); - if (it.phandle) { - /* - * Find the provider node and parse the #*-cells - * property to determine the argument length. - * - * This is not needed if the cell count is hard-coded - * (i.e. cells_name not set, but cell_count is set), - * except when we're going to return the found node - * below. - */ - if (it.cells_name || cur_index == index) { - it.node = of_find_node_by_phandle(it.phandle); - if (!it.node) { - pr_err("%s: could not find phandle\n", - it.parent->full_name); - goto err; - } - } - - if (it.cells_name) { - if (of_property_read_u32(it.node, it.cells_name, - &it.cur_count)) { - pr_err("%s: could not get %s for %s\n", - it.parent->full_name, it.cells_name, - it.node->full_name); - goto err; - } - } else { - it.cur_count = it.cell_count; - } - - /* - * Make sure that the arguments actually fit in the - * remaining property data length - */ - if (it.cur + it.cur_count > it.list_end) { - pr_err("%s: arguments longer than property\n", - it.parent->full_name); - goto err; - } - } - + while ((rc = of_phandle_iterator_next(&it)) == 0) { /* - * All of the error cases above bail out of the loop, so at + * All of the error cases bail out of the loop, so at * this point, the parsing is successful. If the requested * index matches, then fill the out_args structure and return, * or return -ENOENT for an empty entry. @@ -1558,9 +1577,6 @@ static int __of_parse_phandle_with_args(const struct device_node *np, return 0; } - of_node_put(it.node); - it.node = NULL; - it.cur += it.cur_count; cur_index++; } @@ -1570,7 +1586,9 @@ static int __of_parse_phandle_with_args(const struct device_node *np, * -EINVAL : parsing error on data * [1..n] : Number of phandle (count mode; when index = -1) */ - rc = index < 0 ? cur_index : -ENOENT; + if (rc == -ENOENT && index < 0) + rc = cur_index; + err: if (it.node) of_node_put(it.node); diff --git a/include/linux/of.h b/include/linux/of.h index 0f187dbb890b..1f5e108f6716 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -358,6 +358,8 @@ extern int of_phandle_iterator_init(struct of_phandle_iterator *it, const char *cells_name, int cell_count); +extern int of_phandle_iterator_next(struct of_phandle_iterator *it); + extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); @@ -641,6 +643,11 @@ static inline int of_phandle_iterator_init(struct of_phandle_iterator *it, return -ENOSYS; } +static inline int of_phandle_iterator_next(struct of_phandle_iterator *it) +{ + return -ENOSYS; +} + static inline int of_alias_get_id(struct device_node *np, const char *stem) { return -ENOSYS; -- cgit v1.2.3 From f623ce95a51baee6a6638f0b025efc0229a9ac0d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:20 +0200 Subject: of: Introduce of_for_each_phandle() helper macro With this macro any user can easily iterate over a list of phandles. The patch also converts __of_parse_phandle_with_args() to make use of the macro. The of_count_phandle_with_args() function is not converted, because the macro hides the return value of of_phandle_iterator_init(), which is needed in there. Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- drivers/of/base.c | 7 +------ include/linux/of.h | 6 ++++++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index fcff2b62ec10..ea5a13d3c5a5 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1543,13 +1543,8 @@ static int __of_parse_phandle_with_args(const struct device_node *np, struct of_phandle_iterator it; int rc, cur_index = 0; - rc = of_phandle_iterator_init(&it, np, list_name, - cells_name, cell_count); - if (rc) - return rc; - /* Loop over the phandles until all the requested entry is found */ - while ((rc = of_phandle_iterator_next(&it)) == 0) { + of_for_each_phandle(&it, rc, np, list_name, cells_name, cell_count) { /* * All of the error cases bail out of the loop, so at * this point, the parsing is successful. If the requested diff --git a/include/linux/of.h b/include/linux/of.h index 1f5e108f6716..b0b80716fbfb 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -908,6 +908,12 @@ static inline int of_property_read_s32(const struct device_node *np, return of_property_read_u32(np, propname, (u32*) out_value); } +#define of_for_each_phandle(it, err, np, ln, cn, cc) \ + for (of_phandle_iterator_init((it), (np), (ln), (cn), (cc)), \ + err = of_phandle_iterator_next(it); \ + err == 0; \ + err = of_phandle_iterator_next(it)) + #define of_property_for_each_u32(np, propname, prop, p, u) \ for (prop = of_find_property(np, propname, NULL), \ p = of_prop_next_u32(prop, NULL, &u); \ -- cgit v1.2.3 From abdaa77b18480361f3565d958a2acffad268c39c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:21 +0200 Subject: of: Introduce of_phandle_iterator_args() This helper function can be used to copy the arguments of a phandle to an array. Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- drivers/of/base.c | 29 +++++++++++++++++++++++------ include/linux/of.h | 10 ++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index ea5a13d3c5a5..e87e21df19d8 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1534,6 +1534,23 @@ err: return -EINVAL; } +int of_phandle_iterator_args(struct of_phandle_iterator *it, + uint32_t *args, + int size) +{ + int i, count; + + count = it->cur_count; + + if (WARN_ON(size < count)) + count = size; + + for (i = 0; i < count; i++) + args[i] = be32_to_cpup(it->cur++); + + return count; +} + static int __of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, @@ -1557,13 +1574,13 @@ static int __of_parse_phandle_with_args(const struct device_node *np, goto err; if (out_args) { - int i; - if (WARN_ON(it.cur_count > MAX_PHANDLE_ARGS)) - it.cur_count = MAX_PHANDLE_ARGS; + int c; + + c = of_phandle_iterator_args(&it, + out_args->args, + MAX_PHANDLE_ARGS); out_args->np = it.node; - out_args->args_count = it.cur_count; - for (i = 0; i < it.cur_count; i++) - out_args->args[i] = be32_to_cpup(it.cur++); + out_args->args_count = c; } else { of_node_put(it.node); } diff --git a/include/linux/of.h b/include/linux/of.h index b0b80716fbfb..71e1c35a5960 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -359,6 +359,9 @@ extern int of_phandle_iterator_init(struct of_phandle_iterator *it, int cell_count); extern int of_phandle_iterator_next(struct of_phandle_iterator *it); +extern int of_phandle_iterator_args(struct of_phandle_iterator *it, + uint32_t *args, + int size); extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); @@ -648,6 +651,13 @@ static inline int of_phandle_iterator_next(struct of_phandle_iterator *it) return -ENOSYS; } +static inline int of_phandle_iterator_args(struct of_phandle_iterator *it, + uint32_t *args, + int size) +{ + return 0; +} + static inline int of_alias_get_id(struct device_node *np, const char *stem) { return -ENOSYS; -- cgit v1.2.3 From d0bad49bb0a094a1beb06640785f95cb256b7272 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 3 Mar 2016 12:54:55 -0600 Subject: tracing: Add enable_hist/disable_hist triggers Similar to enable_event/disable_event triggers, these triggers enable and disable the aggregation of events into maps rather than enabling and disabling their writing into the trace buffer. They can be used to automatically start and stop hist triggers based on a matching filter condition. If there's a paused hist trigger on system:event, the following would start it when the filter condition was hit: # echo enable_hist:system:event [ if filter] > event/trigger And the following would disable a running system:event hist trigger: # echo disable_hist:system:event [ if filter] > event/trigger See Documentation/trace/events.txt for real examples. Link: http://lkml.kernel.org/r/f812f086e52c8b7c8ad5443487375e03c96a601f.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Tested-by: Masami Hiramatsu Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 1 + kernel/trace/trace.c | 8 +++ kernel/trace/trace.h | 32 ++++++++++ kernel/trace/trace_events_hist.c | 115 ++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_trigger.c | 71 ++++++++++++---------- 5 files changed, 196 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 404603720650..5f89a5b0c7e6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -408,6 +408,7 @@ enum event_trigger_type { ETT_STACKTRACE = (1 << 2), ETT_EVENT_ENABLE = (1 << 3), ETT_EVENT_HIST = (1 << 4), + ETT_HIST_ENABLE = (1 << 5), }; extern int filter_match_preds(struct event_filter *filter, void *rec); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2238bfde799b..8430145bea12 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3807,6 +3807,10 @@ static const char readme_msg[] = "\t trigger: traceon, traceoff\n" "\t enable_event::\n" "\t disable_event::\n" +#ifdef CONFIG_HIST_TRIGGERS + "\t enable_hist::\n" + "\t disable_hist::\n" +#endif #ifdef CONFIG_STACKTRACE "\t\t stacktrace\n" #endif @@ -3867,6 +3871,10 @@ static const char readme_msg[] = "\t The 'clear' parameter will clear the contents of a running\n" "\t hist trigger and leave its current paused/active state\n" "\t unchanged.\n\n" + "\t The enable_hist and disable_hist triggers can be used to\n" + "\t have one event conditionally start and stop another event's\n" + "\t already-attached hist trigger. The syntax is analagous to\n" + "\t the enable_event and disable_event triggers.\n" #endif ; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 505f8a45f426..cab1f4bfe85b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1166,8 +1166,10 @@ extern const struct file_operations event_hist_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); +extern int register_trigger_hist_enable_disable_cmds(void); #else static inline int register_trigger_hist_cmd(void) { return 0; } +static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; } #endif extern int register_trigger_cmds(void); @@ -1185,6 +1187,34 @@ struct event_trigger_data { struct list_head list; }; +/* Avoid typos */ +#define ENABLE_EVENT_STR "enable_event" +#define DISABLE_EVENT_STR "disable_event" +#define ENABLE_HIST_STR "enable_hist" +#define DISABLE_HIST_STR "disable_hist" + +struct enable_trigger_data { + struct trace_event_file *file; + bool enable; + bool hist; +}; + +extern int event_enable_trigger_print(struct seq_file *m, + struct event_trigger_ops *ops, + struct event_trigger_data *data); +extern void event_enable_trigger_free(struct event_trigger_ops *ops, + struct event_trigger_data *data); +extern int event_enable_trigger_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param); +extern int event_enable_register_trigger(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct trace_event_file *file); +extern void event_enable_unregister_trigger(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *test, + struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_ops *ops, struct event_trigger_data *data); @@ -1198,6 +1228,8 @@ extern int set_trigger_filter(char *filter_str, struct event_trigger_data *trigger_data, struct trace_event_file *file); extern int register_event_command(struct event_command *cmd); +extern int unregister_event_command(struct event_command *cmd); +extern int register_trigger_hist_enable_disable_cmds(void); /** * struct event_trigger_ops - callbacks for trace event triggers diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 4f4041d76926..5d4f02792440 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1393,3 +1393,118 @@ __init int register_trigger_hist_cmd(void) return ret; } + +static void +hist_enable_trigger(struct event_trigger_data *data, void *rec) +{ + struct enable_trigger_data *enable_data = data->private_data; + struct event_trigger_data *test; + + list_for_each_entry_rcu(test, &enable_data->file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { + if (enable_data->enable) + test->paused = false; + else + test->paused = true; + break; + } + } +} + +static void +hist_enable_count_trigger(struct event_trigger_data *data, void *rec) +{ + if (!data->count) + return; + + if (data->count != -1) + (data->count)--; + + hist_enable_trigger(data, rec); +} + +static struct event_trigger_ops hist_enable_trigger_ops = { + .func = hist_enable_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static struct event_trigger_ops hist_enable_count_trigger_ops = { + .func = hist_enable_count_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static struct event_trigger_ops hist_disable_trigger_ops = { + .func = hist_enable_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static struct event_trigger_ops hist_disable_count_trigger_ops = { + .func = hist_enable_count_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static struct event_trigger_ops * +hist_enable_get_trigger_ops(char *cmd, char *param) +{ + struct event_trigger_ops *ops; + bool enable; + + enable = (strcmp(cmd, ENABLE_HIST_STR) == 0); + + if (enable) + ops = param ? &hist_enable_count_trigger_ops : + &hist_enable_trigger_ops; + else + ops = param ? &hist_disable_count_trigger_ops : + &hist_disable_trigger_ops; + + return ops; +} + +static struct event_command trigger_hist_enable_cmd = { + .name = ENABLE_HIST_STR, + .trigger_type = ETT_HIST_ENABLE, + .func = event_enable_trigger_func, + .reg = event_enable_register_trigger, + .unreg = event_enable_unregister_trigger, + .get_trigger_ops = hist_enable_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +static struct event_command trigger_hist_disable_cmd = { + .name = DISABLE_HIST_STR, + .trigger_type = ETT_HIST_ENABLE, + .func = event_enable_trigger_func, + .reg = event_enable_register_trigger, + .unreg = event_enable_unregister_trigger, + .get_trigger_ops = hist_enable_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +static __init void unregister_trigger_hist_enable_disable_cmds(void) +{ + unregister_event_command(&trigger_hist_enable_cmd); + unregister_event_command(&trigger_hist_disable_cmd); +} + +__init int register_trigger_hist_enable_disable_cmds(void) +{ + int ret; + + ret = register_event_command(&trigger_hist_enable_cmd); + if (WARN_ON(ret < 0)) + return ret; + ret = register_event_command(&trigger_hist_disable_cmd); + if (WARN_ON(ret < 0)) + unregister_trigger_hist_enable_disable_cmds(); + + return ret; +} diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d29092afe005..d133f2094566 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -347,7 +347,7 @@ __init int register_event_command(struct event_command *cmd) * Currently we only unregister event commands from __init, so mark * this __init too. */ -static __init int unregister_event_command(struct event_command *cmd) +__init int unregister_event_command(struct event_command *cmd) { struct event_command *p, *n; int ret = -ENODEV; @@ -1062,15 +1062,6 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void) unregister_event_command(&trigger_traceoff_cmd); } -/* Avoid typos */ -#define ENABLE_EVENT_STR "enable_event" -#define DISABLE_EVENT_STR "disable_event" - -struct enable_trigger_data { - struct trace_event_file *file; - bool enable; -}; - static void event_enable_trigger(struct event_trigger_data *data, void *rec) { @@ -1100,14 +1091,16 @@ event_enable_count_trigger(struct event_trigger_data *data, void *rec) event_enable_trigger(data, rec); } -static int -event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, - struct event_trigger_data *data) +int event_enable_trigger_print(struct seq_file *m, + struct event_trigger_ops *ops, + struct event_trigger_data *data) { struct enable_trigger_data *enable_data = data->private_data; seq_printf(m, "%s:%s:%s", - enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, + enable_data->hist ? + (enable_data->enable ? ENABLE_HIST_STR : DISABLE_HIST_STR) : + (enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR), enable_data->file->event_call->class->system, trace_event_name(enable_data->file->event_call)); @@ -1124,9 +1117,8 @@ event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, return 0; } -static void -event_enable_trigger_free(struct event_trigger_ops *ops, - struct event_trigger_data *data) +void event_enable_trigger_free(struct event_trigger_ops *ops, + struct event_trigger_data *data) { struct enable_trigger_data *enable_data = data->private_data; @@ -1171,10 +1163,9 @@ static struct event_trigger_ops event_disable_count_trigger_ops = { .free = event_enable_trigger_free, }; -static int -event_enable_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +int event_enable_trigger_func(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { struct trace_event_file *event_enable_file; struct enable_trigger_data *enable_data; @@ -1183,6 +1174,7 @@ event_enable_trigger_func(struct event_command *cmd_ops, struct trace_array *tr = file->tr; const char *system; const char *event; + bool hist = false; char *trigger; char *number; bool enable; @@ -1207,8 +1199,15 @@ event_enable_trigger_func(struct event_command *cmd_ops, if (!event_enable_file) goto out; - enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; +#ifdef CONFIG_HIST_TRIGGERS + hist = ((strcmp(cmd, ENABLE_HIST_STR) == 0) || + (strcmp(cmd, DISABLE_HIST_STR) == 0)); + enable = ((strcmp(cmd, ENABLE_EVENT_STR) == 0) || + (strcmp(cmd, ENABLE_HIST_STR) == 0)); +#else + enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; +#endif trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); ret = -ENOMEM; @@ -1228,6 +1227,7 @@ event_enable_trigger_func(struct event_command *cmd_ops, INIT_LIST_HEAD(&trigger_data->list); RCU_INIT_POINTER(trigger_data->filter, NULL); + enable_data->hist = hist; enable_data->enable = enable; enable_data->file = event_enable_file; trigger_data->private_data = enable_data; @@ -1305,10 +1305,10 @@ event_enable_trigger_func(struct event_command *cmd_ops, goto out; } -static int event_enable_register_trigger(char *glob, - struct event_trigger_ops *ops, - struct event_trigger_data *data, - struct trace_event_file *file) +int event_enable_register_trigger(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct trace_event_file *file) { struct enable_trigger_data *enable_data = data->private_data; struct enable_trigger_data *test_enable_data; @@ -1318,6 +1318,8 @@ static int event_enable_register_trigger(char *glob, list_for_each_entry_rcu(test, &file->triggers, list) { test_enable_data = test->private_data; if (test_enable_data && + (test->cmd_ops->trigger_type == + data->cmd_ops->trigger_type) && (test_enable_data->file == enable_data->file)) { ret = -EEXIST; goto out; @@ -1343,10 +1345,10 @@ out: return ret; } -static void event_enable_unregister_trigger(char *glob, - struct event_trigger_ops *ops, - struct event_trigger_data *test, - struct trace_event_file *file) +void event_enable_unregister_trigger(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *test, + struct trace_event_file *file) { struct enable_trigger_data *test_enable_data = test->private_data; struct enable_trigger_data *enable_data; @@ -1356,6 +1358,8 @@ static void event_enable_unregister_trigger(char *glob, list_for_each_entry_rcu(data, &file->triggers, list) { enable_data = data->private_data; if (enable_data && + (data->cmd_ops->trigger_type == + test->cmd_ops->trigger_type) && (enable_data->file == test_enable_data->file)) { unregistered = true; list_del_rcu(&data->list); @@ -1375,8 +1379,12 @@ event_enable_get_trigger_ops(char *cmd, char *param) struct event_trigger_ops *ops; bool enable; +#ifdef CONFIG_HIST_TRIGGERS + enable = ((strcmp(cmd, ENABLE_EVENT_STR) == 0) || + (strcmp(cmd, ENABLE_HIST_STR) == 0)); +#else enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; - +#endif if (enable) ops = param ? &event_enable_count_trigger_ops : &event_enable_trigger_ops; @@ -1447,6 +1455,7 @@ __init int register_trigger_cmds(void) register_trigger_snapshot_cmd(); register_trigger_stacktrace_cmd(); register_trigger_enable_disable_cmds(); + register_trigger_hist_enable_disable_cmds(); register_trigger_hist_cmd(); return 0; -- cgit v1.2.3 From c1d61c9bb163e696bf06850bcabbd26386554489 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 31 Mar 2016 16:34:32 -0600 Subject: PCI: Reverse standard ACS vs device-specific ACS enabling The original thought was that if a device implemented ACS, then surely we want to use that... well, it turns out that devices can make an ACS capability so broken that we still need to fall back to quirks. Reverse the order of ACS enabling to give quirks first shot at it. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 10 ++++------ drivers/pci/quirks.c | 6 ++++-- include/linux/pci.h | 7 +++++-- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 25e0327d4429..c98c4e2aed3c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2547,7 +2547,7 @@ void pci_request_acs(void) * pci_std_enable_acs - enable ACS on devices using standard ACS capabilites * @dev: the PCI device */ -static int pci_std_enable_acs(struct pci_dev *dev) +static void pci_std_enable_acs(struct pci_dev *dev) { int pos; u16 cap; @@ -2555,7 +2555,7 @@ static int pci_std_enable_acs(struct pci_dev *dev) pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); if (!pos) - return -ENODEV; + return; pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap); pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl); @@ -2573,8 +2573,6 @@ static int pci_std_enable_acs(struct pci_dev *dev) ctrl |= (cap & PCI_ACS_UF); pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); - - return 0; } /** @@ -2586,10 +2584,10 @@ void pci_enable_acs(struct pci_dev *dev) if (!pci_acs_enable) return; - if (!pci_std_enable_acs(dev)) + if (!pci_dev_specific_enable_acs(dev)) return; - pci_dev_specific_enable_acs(dev); + pci_std_enable_acs(dev); } static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e248c2aad000..1f5c7898a246 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4201,7 +4201,7 @@ static const struct pci_dev_enable_acs { { 0 } }; -void pci_dev_specific_enable_acs(struct pci_dev *dev) +int pci_dev_specific_enable_acs(struct pci_dev *dev) { const struct pci_dev_enable_acs *i; int ret; @@ -4213,9 +4213,11 @@ void pci_dev_specific_enable_acs(struct pci_dev *dev) i->device == (u16)PCI_ANY_ID)) { ret = i->enable_acs(dev); if (ret >= 0) - return; + return ret; } } + + return -ENOTTY; } /* diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..aaec79aee805 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1663,7 +1663,7 @@ enum pci_fixup_pass { #ifdef CONFIG_PCI_QUIRKS void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); -void pci_dev_specific_enable_acs(struct pci_dev *dev); +int pci_dev_specific_enable_acs(struct pci_dev *dev); #else static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } @@ -1672,7 +1672,10 @@ static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev, { return -ENOTTY; } -static inline void pci_dev_specific_enable_acs(struct pci_dev *dev) { } +static inline int pci_dev_specific_enable_acs(struct pci_dev *dev) +{ + return -ENOTTY; +} #endif void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); -- cgit v1.2.3 From a14b9e0512404ed7d4415b888dc9f1f9785a4fa3 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 5 Feb 2016 16:40:47 -0800 Subject: clkdev: Remove clk_register_clkdevs() Now that we've converted the only caller over to another clkdev API, remove this one. Reviewed-by: Andy Shevchenko Cc: Russell King Signed-off-by: Stephen Boyd --- drivers/clk/clkdev.c | 27 --------------------------- include/linux/clkdev.h | 1 - 2 files changed, 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index eb20b941154b..ae8e40a82d34 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -402,30 +402,3 @@ int clk_register_clkdev(struct clk *clk, const char *con_id, return cl ? 0 : -ENOMEM; } EXPORT_SYMBOL(clk_register_clkdev); - -/** - * clk_register_clkdevs - register a set of clk_lookup for a struct clk - * @clk: struct clk to associate with all clk_lookups - * @cl: array of clk_lookup structures with con_id and dev_id pre-initialized - * @num: number of clk_lookup structures to register - * - * To make things easier for mass registration, we detect error clks - * from a previous clk_register() call, and return the error code for - * those. This is to permit this function to be called immediately - * after clk_register(). - */ -int clk_register_clkdevs(struct clk *clk, struct clk_lookup *cl, size_t num) -{ - unsigned i; - - if (IS_ERR(clk)) - return PTR_ERR(clk); - - for (i = 0; i < num; i++, cl++) { - cl->clk_hw = __clk_get_hw(clk); - __clkdev_add(cl); - } - - return 0; -} -EXPORT_SYMBOL(clk_register_clkdevs); diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index c2c04f7cbe8a..e6f8eb1d585f 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -45,7 +45,6 @@ void clkdev_add_table(struct clk_lookup *, size_t); int clk_add_alias(const char *, const char *, const char *, struct device *); int clk_register_clkdev(struct clk *, const char *, const char *); -int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t); #ifdef CONFIG_COMMON_CLK int __clk_get(struct clk *clk); -- cgit v1.2.3 From 4143804c4fdef40358c654d1fb2271a1a0f1fedf Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 5 Feb 2016 17:02:52 -0800 Subject: clk: Add {devm_}clk_hw_{register,unregister}() APIs We've largely split the clk consumer and provider APIs along struct clk and struct clk_hw, but clk_register() still returns a struct clk pointer for each struct clk_hw that's registered. Eventually we'd like to only allocate struct clks when there's a user, because struct clk is per-user now, so clk_register() needs to change. Let's add new APIs to register struct clk_hws, but this time we'll hide the struct clk from the caller by returning an int error code. Also add an unregistration API that takes the clk_hw structure that was passed to the registration API. This way provider drivers never have to deal with a struct clk pointer unless they're using the clk consumer APIs. Signed-off-by: Stephen Boyd --- Documentation/driver-model/devres.txt | 1 + drivers/clk/clk.c | 86 +++++++++++++++++++++++++++++++++++ include/linux/clk-provider.h | 6 +++ 3 files changed, 93 insertions(+) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 73b98dfbcea4..108d45553e1b 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -236,6 +236,7 @@ certainly invest a bit more effort into libata core layer). CLOCK devm_clk_get() devm_clk_put() + devm_clk_hw_register() DMA dmam_alloc_coherent() diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index fb74dc1f7520..0ef919666827 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2536,6 +2536,22 @@ fail_out: } EXPORT_SYMBOL_GPL(clk_register); +/** + * clk_hw_register - register a clk_hw and return an error code + * @dev: device that is registering this clock + * @hw: link to hardware-specific clock data + * + * clk_hw_register is the primary interface for populating the clock tree with + * new clock nodes. It returns an integer equal to zero indicating success or + * less than zero indicating failure. Drivers must test for an error code after + * calling clk_hw_register(). + */ +int clk_hw_register(struct device *dev, struct clk_hw *hw) +{ + return PTR_ERR_OR_ZERO(clk_register(dev, hw)); +} +EXPORT_SYMBOL_GPL(clk_hw_register); + /* Free memory allocated for a clock. */ static void __clk_release(struct kref *ref) { @@ -2637,11 +2653,26 @@ unlock: } EXPORT_SYMBOL_GPL(clk_unregister); +/** + * clk_hw_unregister - unregister a currently registered clk_hw + * @hw: hardware-specific clock data to unregister + */ +void clk_hw_unregister(struct clk_hw *hw) +{ + clk_unregister(hw->clk); +} +EXPORT_SYMBOL_GPL(clk_hw_unregister); + static void devm_clk_release(struct device *dev, void *res) { clk_unregister(*(struct clk **)res); } +static void devm_clk_hw_release(struct device *dev, void *res) +{ + clk_hw_unregister(*(struct clk_hw **)res); +} + /** * devm_clk_register - resource managed clk_register() * @dev: device that is registering this clock @@ -2672,6 +2703,36 @@ struct clk *devm_clk_register(struct device *dev, struct clk_hw *hw) } EXPORT_SYMBOL_GPL(devm_clk_register); +/** + * devm_clk_hw_register - resource managed clk_hw_register() + * @dev: device that is registering this clock + * @hw: link to hardware-specific clock data + * + * Managed clk_hw_register(). Clocks returned from this function are + * automatically clk_hw_unregister()ed on driver detach. See clk_hw_register() + * for more information. + */ +int devm_clk_hw_register(struct device *dev, struct clk_hw *hw) +{ + struct clk_hw **hwp; + int ret; + + hwp = devres_alloc(devm_clk_hw_release, sizeof(*hwp), GFP_KERNEL); + if (!hwp) + return -ENOMEM; + + ret = clk_hw_register(dev, hw); + if (!ret) { + *hwp = hw; + devres_add(dev, hwp); + } else { + devres_free(hwp); + } + + return ret; +} +EXPORT_SYMBOL_GPL(devm_clk_hw_register); + static int devm_clk_match(struct device *dev, void *res, void *data) { struct clk *c = res; @@ -2680,6 +2741,15 @@ static int devm_clk_match(struct device *dev, void *res, void *data) return c == data; } +static int devm_clk_hw_match(struct device *dev, void *res, void *data) +{ + struct clk_hw *hw = res; + + if (WARN_ON(!hw)) + return 0; + return hw == data; +} + /** * devm_clk_unregister - resource managed clk_unregister() * @clk: clock to unregister @@ -2694,6 +2764,22 @@ void devm_clk_unregister(struct device *dev, struct clk *clk) } EXPORT_SYMBOL_GPL(devm_clk_unregister); +/** + * devm_clk_hw_unregister - resource managed clk_hw_unregister() + * @dev: device that is unregistering the hardware-specific clock data + * @hw: link to hardware-specific clock data + * + * Unregister a clk_hw registered with devm_clk_hw_register(). Normally + * this function will not need to be called and the resource management + * code will ensure that the resource is freed. + */ +void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw) +{ + WARN_ON(devres_release(dev, devm_clk_hw_release, devm_clk_hw_match, + hw)); +} +EXPORT_SYMBOL_GPL(devm_clk_hw_unregister); + /* * clkdev helpers */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index da95258127aa..bc6c8de1fac1 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -655,9 +655,15 @@ struct clk *clk_register_gpio_mux(struct device *dev, const char *name, struct clk *clk_register(struct device *dev, struct clk_hw *hw); struct clk *devm_clk_register(struct device *dev, struct clk_hw *hw); +int __must_check clk_hw_register(struct device *dev, struct clk_hw *hw); +int __must_check devm_clk_hw_register(struct device *dev, struct clk_hw *hw); + void clk_unregister(struct clk *clk); void devm_clk_unregister(struct device *dev, struct clk *clk); +void clk_hw_unregister(struct clk_hw *hw); +void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw); + /* helper functions */ const char *__clk_get_name(const struct clk *clk); const char *clk_hw_get_name(const struct clk_hw *hw); -- cgit v1.2.3 From 0861e5b8cf80038e91942f1005c8dfce79d18c38 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 5 Feb 2016 17:38:26 -0800 Subject: clk: Add clk_hw OF clk providers Now that we have a clk registration API that doesn't return struct clks, we need to have some way to hand out struct clks via the clk_get() APIs that doesn't involve associating struct clk pointers with an OF node. Currently we ask the OF provider to give us a struct clk pointer for some clkspec, turn that struct clk into a struct clk_hw and then allocate a new struct clk to return to the caller. Let's add a clk_hw based OF provider hook that returns a struct clk_hw directly, so that we skip the intermediate step of converting from struct clk to struct clk_hw. Eventually when we've converted all OF clk providers to struct clk_hw based APIs we can remove the struct clk based ones. It should also be noted that we change the onecell provider to have a flex array instead of a pointer for the array of clk_hw pointers. This allows providers to allocate one structure of the correct length in one step instead of two. Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 85 +++++++++++++++++++++++++++++++++++++++++--- include/linux/clk-provider.h | 30 ++++++++++++++++ 2 files changed, 111 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 0ef919666827..e813b2aabc87 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2941,6 +2941,7 @@ struct of_clk_provider { struct device_node *node; struct clk *(*get)(struct of_phandle_args *clkspec, void *data); + struct clk_hw *(*get_hw)(struct of_phandle_args *clkspec, void *data); void *data; }; @@ -2957,6 +2958,12 @@ struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, } EXPORT_SYMBOL_GPL(of_clk_src_simple_get); +struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec, void *data) +{ + return data; +} +EXPORT_SYMBOL_GPL(of_clk_hw_simple_get); + struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data) { struct clk_onecell_data *clk_data = data; @@ -2971,6 +2978,21 @@ struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data) } EXPORT_SYMBOL_GPL(of_clk_src_onecell_get); +struct clk_hw * +of_clk_hw_onecell_get(struct of_phandle_args *clkspec, void *data) +{ + struct clk_hw_onecell_data *hw_data = data; + unsigned int idx = clkspec->args[0]; + + if (idx >= hw_data->num) { + pr_err("%s: invalid index %u\n", __func__, idx); + return ERR_PTR(-EINVAL); + } + + return hw_data->hws[idx]; +} +EXPORT_SYMBOL_GPL(of_clk_hw_onecell_get); + /** * of_clk_add_provider() - Register a clock provider for a node * @np: Device node pointer associated with clock provider @@ -3006,6 +3028,41 @@ int of_clk_add_provider(struct device_node *np, } EXPORT_SYMBOL_GPL(of_clk_add_provider); +/** + * of_clk_add_hw_provider() - Register a clock provider for a node + * @np: Device node pointer associated with clock provider + * @get: callback for decoding clk_hw + * @data: context pointer for @get callback. + */ +int of_clk_add_hw_provider(struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, + void *data), + void *data) +{ + struct of_clk_provider *cp; + int ret; + + cp = kzalloc(sizeof(*cp), GFP_KERNEL); + if (!cp) + return -ENOMEM; + + cp->node = of_node_get(np); + cp->data = data; + cp->get_hw = get; + + mutex_lock(&of_clk_mutex); + list_add(&cp->link, &of_clk_providers); + mutex_unlock(&of_clk_mutex); + pr_debug("Added clk_hw provider from %s\n", np->full_name); + + ret = of_clk_set_defaults(np, true); + if (ret < 0) + of_clk_del_provider(np); + + return ret; +} +EXPORT_SYMBOL_GPL(of_clk_add_hw_provider); + /** * of_clk_del_provider() - Remove a previously registered clock provider * @np: Device node pointer associated with clock provider @@ -3027,11 +3084,32 @@ void of_clk_del_provider(struct device_node *np) } EXPORT_SYMBOL_GPL(of_clk_del_provider); +static struct clk_hw * +__of_clk_get_hw_from_provider(struct of_clk_provider *provider, + struct of_phandle_args *clkspec) +{ + struct clk *clk; + struct clk_hw *hw = ERR_PTR(-EPROBE_DEFER); + + if (provider->get_hw) { + hw = provider->get_hw(clkspec, provider->data); + } else if (provider->get) { + clk = provider->get(clkspec, provider->data); + if (!IS_ERR(clk)) + hw = __clk_get_hw(clk); + else + hw = ERR_CAST(clk); + } + + return hw; +} + struct clk *__of_clk_get_from_provider(struct of_phandle_args *clkspec, const char *dev_id, const char *con_id) { struct of_clk_provider *provider; struct clk *clk = ERR_PTR(-EPROBE_DEFER); + struct clk_hw *hw = ERR_PTR(-EPROBE_DEFER); if (!clkspec) return ERR_PTR(-EINVAL); @@ -3040,10 +3118,9 @@ struct clk *__of_clk_get_from_provider(struct of_phandle_args *clkspec, mutex_lock(&of_clk_mutex); list_for_each_entry(provider, &of_clk_providers, link) { if (provider->node == clkspec->np) - clk = provider->get(clkspec, provider->data); - if (!IS_ERR(clk)) { - clk = __clk_create_clk(__clk_get_hw(clk), dev_id, - con_id); + hw = __of_clk_get_hw_from_provider(provider, clkspec); + if (!IS_ERR(hw)) { + clk = __clk_create_clk(hw, dev_id, con_id); if (!IS_ERR(clk) && !__clk_get(clk)) { __clk_free_clk(clk); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bc6c8de1fac1..bf8c8bb8c2cb 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -709,6 +709,11 @@ struct clk_onecell_data { unsigned int clk_num; }; +struct clk_hw_onecell_data { + size_t num; + struct clk_hw *hws[]; +}; + extern struct of_device_id __clk_of_table; #define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn) @@ -718,10 +723,18 @@ int of_clk_add_provider(struct device_node *np, struct clk *(*clk_src_get)(struct of_phandle_args *args, void *data), void *data); +int of_clk_add_hw_provider(struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, + void *data), + void *data); void of_clk_del_provider(struct device_node *np); struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, void *data); +struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec, + void *data); struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data); +struct clk_hw *of_clk_hw_onecell_get(struct of_phandle_args *clkspec, + void *data); unsigned int of_clk_get_parent_count(struct device_node *np); int of_clk_parent_fill(struct device_node *np, const char **parents, unsigned int size); @@ -738,17 +751,34 @@ static inline int of_clk_add_provider(struct device_node *np, { return 0; } +static inline int of_clk_add_hw_provider(struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, + void *data), + void *data) +{ + return 0; +} static inline void of_clk_del_provider(struct device_node *np) {} static inline struct clk *of_clk_src_simple_get( struct of_phandle_args *clkspec, void *data) { return ERR_PTR(-ENOENT); } +static inline struct clk_hw * +of_clk_hw_simple_get(struct of_phandle_args *clkspec, void *data) +{ + return ERR_PTR(-ENOENT); +} static inline struct clk *of_clk_src_onecell_get( struct of_phandle_args *clkspec, void *data) { return ERR_PTR(-ENOENT); } +static inline struct clk_hw * +of_clk_hw_onecell_get(struct of_phandle_args *clkspec, void *data) +{ + return ERR_PTR(-ENOENT); +} static inline int of_clk_get_parent_count(struct device_node *np) { return 0; -- cgit v1.2.3 From e4f1b49bda6d6aa2e13730ff7eeccbe65a6271f1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 8 Feb 2016 14:59:49 -0800 Subject: clkdev: Add clk_hw based registration APIs Now that we have a clk registration API that doesn't return struct clks, we need to have some way to hand out struct clks via the clk_get() APIs that doesn't involve associating struct clk pointers with a struct clk_lookup. Luckily, clkdev already operates on struct clk_hw pointers, except for the registration facing APIs where it converts struct clk pointers into struct clk_hw pointers almost immediately. Let's add clk_hw based registration APIs so that we can skip the conversion step and provide a way for clk provider drivers to operate exclusively on clk_hw structs. This way we clearly split the API between consumers and providers. Cc: Russell King Signed-off-by: Stephen Boyd --- drivers/clk/clkdev.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/clkdev.h | 6 +++++ 2 files changed, 70 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index ae8e40a82d34..89cc700fbc37 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -301,6 +301,20 @@ clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) } EXPORT_SYMBOL(clkdev_alloc); +struct clk_lookup * +clkdev_hw_alloc(struct clk_hw *hw, const char *con_id, const char *dev_fmt, ...) +{ + struct clk_lookup *cl; + va_list ap; + + va_start(ap, dev_fmt); + cl = vclkdev_alloc(hw, con_id, dev_fmt, ap); + va_end(ap); + + return cl; +} +EXPORT_SYMBOL(clkdev_hw_alloc); + /** * clkdev_create - allocate and add a clkdev lookup structure * @clk: struct clk to associate with all clk_lookups @@ -324,6 +338,29 @@ struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id, } EXPORT_SYMBOL_GPL(clkdev_create); +/** + * clkdev_hw_create - allocate and add a clkdev lookup structure + * @hw: struct clk_hw to associate with all clk_lookups + * @con_id: connection ID string on device + * @dev_fmt: format string describing device name + * + * Returns a clk_lookup structure, which can be later unregistered and + * freed. + */ +struct clk_lookup *clkdev_hw_create(struct clk_hw *hw, const char *con_id, + const char *dev_fmt, ...) +{ + struct clk_lookup *cl; + va_list ap; + + va_start(ap, dev_fmt); + cl = vclkdev_create(hw, con_id, dev_fmt, ap); + va_end(ap); + + return cl; +} +EXPORT_SYMBOL_GPL(clkdev_hw_create); + int clk_add_alias(const char *alias, const char *alias_dev_name, const char *con_id, struct device *dev) { @@ -402,3 +439,30 @@ int clk_register_clkdev(struct clk *clk, const char *con_id, return cl ? 0 : -ENOMEM; } EXPORT_SYMBOL(clk_register_clkdev); + +/** + * clk_hw_register_clkdev - register one clock lookup for a struct clk_hw + * @hw: struct clk_hw to associate with all clk_lookups + * @con_id: connection ID string on device + * @dev_id: format string describing device name + * + * con_id or dev_id may be NULL as a wildcard, just as in the rest of + * clkdev. + */ +int clk_hw_register_clkdev(struct clk_hw *hw, const char *con_id, + const char *dev_id) +{ + struct clk_lookup *cl; + + /* + * Since dev_id can be NULL, and NULL is handled specially, we must + * pass it as either a NULL format string, or with "%s". + */ + if (dev_id) + cl = __clk_register_clkdev(hw, con_id, "%s", dev_id); + else + cl = __clk_register_clkdev(hw, con_id, NULL); + + return cl ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(clk_hw_register_clkdev); diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index e6f8eb1d585f..2eabc862abdb 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -15,6 +15,7 @@ #include struct clk; +struct clk_hw; struct device; struct clk_lookup { @@ -34,17 +35,22 @@ struct clk_lookup { struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) __printf(3, 4); +struct clk_lookup *clkdev_hw_alloc(struct clk_hw *hw, const char *con_id, + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add(struct clk_lookup *cl); void clkdev_drop(struct clk_lookup *cl); struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id, const char *dev_fmt, ...) __printf(3, 4); +struct clk_lookup *clkdev_hw_create(struct clk_hw *hw, const char *con_id, + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add_table(struct clk_lookup *, size_t); int clk_add_alias(const char *, const char *, const char *, struct device *); int clk_register_clkdev(struct clk *, const char *, const char *); +int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *); #ifdef CONFIG_COMMON_CLK int __clk_get(struct clk *clk); -- cgit v1.2.3 From eb7d264f3bf9ca7c093efb77bdde557c6c6e826f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 6 Feb 2016 23:26:37 -0800 Subject: clk: divider: Add hw based registration APIs Add registration APIs in the clk divider code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-divider.c | 91 ++++++++++++++++++++++++++++++++++++++++---- include/linux/clk-provider.h | 10 +++++ 2 files changed, 93 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 00e035b51c69..a0f55bc1ad3d 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -426,15 +426,16 @@ const struct clk_ops clk_divider_ro_ops = { }; EXPORT_SYMBOL_GPL(clk_divider_ro_ops); -static struct clk *_register_divider(struct device *dev, const char *name, +static struct clk_hw *_register_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock) { struct clk_divider *div; - struct clk *clk; + struct clk_hw *hw; struct clk_init_data init; + int ret; if (clk_divider_flags & CLK_DIVIDER_HIWORD_MASK) { if (width + shift > 16) { @@ -467,12 +468,14 @@ static struct clk *_register_divider(struct device *dev, const char *name, div->table = table; /* register the clock */ - clk = clk_register(dev, &div->hw); - - if (IS_ERR(clk)) + hw = &div->hw; + ret = clk_hw_register(dev, hw); + if (ret) { kfree(div); + hw = ERR_PTR(ret); + } - return clk; + return hw; } /** @@ -492,11 +495,38 @@ struct clk *clk_register_divider(struct device *dev, const char *name, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, spinlock_t *lock) { - return _register_divider(dev, name, parent_name, flags, reg, shift, + struct clk_hw *hw; + + hw = _register_divider(dev, name, parent_name, flags, reg, shift, width, clk_divider_flags, NULL, lock); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } EXPORT_SYMBOL_GPL(clk_register_divider); +/** + * clk_hw_register_divider - register a divider clock with the clock framework + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @lock: shared register lock for this clock + */ +struct clk_hw *clk_hw_register_divider(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_divider_flags, spinlock_t *lock) +{ + return _register_divider(dev, name, parent_name, flags, reg, shift, + width, clk_divider_flags, NULL, lock); +} +EXPORT_SYMBOL_GPL(clk_hw_register_divider); + /** * clk_register_divider_table - register a table based divider clock with * the clock framework @@ -517,11 +547,41 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock) { - return _register_divider(dev, name, parent_name, flags, reg, shift, + struct clk_hw *hw; + + hw = _register_divider(dev, name, parent_name, flags, reg, shift, width, clk_divider_flags, table, lock); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } EXPORT_SYMBOL_GPL(clk_register_divider_table); +/** + * clk_hw_register_divider_table - register a table based divider clock with + * the clock framework + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @table: array of divider/value pairs ending with a div set to 0 + * @lock: shared register lock for this clock + */ +struct clk_hw *clk_hw_register_divider_table(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_divider_flags, const struct clk_div_table *table, + spinlock_t *lock) +{ + return _register_divider(dev, name, parent_name, flags, reg, shift, + width, clk_divider_flags, table, lock); +} +EXPORT_SYMBOL_GPL(clk_hw_register_divider_table); + void clk_unregister_divider(struct clk *clk) { struct clk_divider *div; @@ -537,3 +597,18 @@ void clk_unregister_divider(struct clk *clk) kfree(div); } EXPORT_SYMBOL_GPL(clk_unregister_divider); + +/** + * clk_hw_unregister_divider - unregister a clk divider + * @hw: hardware-specific clock data to unregister + */ +void clk_hw_unregister_divider(struct clk_hw *hw) +{ + struct clk_divider *div; + + div = to_clk_divider(hw); + + clk_hw_unregister(hw); + kfree(div); +} +EXPORT_SYMBOL_GPL(clk_hw_unregister_divider); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bf8c8bb8c2cb..8885d0350596 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -407,12 +407,22 @@ struct clk *clk_register_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_divider(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_divider_flags, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); +struct clk_hw *clk_hw_register_divider_table(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_divider_flags, const struct clk_div_table *table, + spinlock_t *lock); void clk_unregister_divider(struct clk *clk); +void clk_hw_unregister_divider(struct clk_hw *hw); /** * struct clk_mux - multiplexer clock -- cgit v1.2.3 From e270d8cb13763f58107198e879cf396511ba2867 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 6 Feb 2016 23:54:45 -0800 Subject: clk: gate: Add hw based registration APIs Add registration APIs in the clk gate code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-gate.c | 43 ++++++++++++++++++++++++++++++++++++------- include/linux/clk-provider.h | 5 +++++ 2 files changed, 41 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c index d0d8ec8e1f1b..4e691e35483a 100644 --- a/drivers/clk/clk-gate.c +++ b/drivers/clk/clk-gate.c @@ -110,7 +110,7 @@ const struct clk_ops clk_gate_ops = { EXPORT_SYMBOL_GPL(clk_gate_ops); /** - * clk_register_gate - register a gate clock with the clock framework + * clk_hw_register_gate - register a gate clock with the clock framework * @dev: device that is registering this clock * @name: name of this clock * @parent_name: name of this clock's parent @@ -120,14 +120,15 @@ EXPORT_SYMBOL_GPL(clk_gate_ops); * @clk_gate_flags: gate-specific flags for this clock * @lock: shared register lock for this clock */ -struct clk *clk_register_gate(struct device *dev, const char *name, +struct clk_hw *clk_hw_register_gate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, u8 clk_gate_flags, spinlock_t *lock) { struct clk_gate *gate; - struct clk *clk; + struct clk_hw *hw; struct clk_init_data init; + int ret; if (clk_gate_flags & CLK_GATE_HIWORD_MASK) { if (bit_idx > 15) { @@ -154,12 +155,29 @@ struct clk *clk_register_gate(struct device *dev, const char *name, gate->lock = lock; gate->hw.init = &init; - clk = clk_register(dev, &gate->hw); - - if (IS_ERR(clk)) + hw = &gate->hw; + ret = clk_hw_register(dev, hw); + if (ret) { kfree(gate); + hw = ERR_PTR(ret); + } - return clk; + return hw; +} +EXPORT_SYMBOL_GPL(clk_hw_register_gate); + +struct clk *clk_register_gate(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, + u8 clk_gate_flags, spinlock_t *lock) +{ + struct clk_hw *hw; + + hw = clk_hw_register_gate(dev, name, parent_name, flags, reg, + bit_idx, clk_gate_flags, lock); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } EXPORT_SYMBOL_GPL(clk_register_gate); @@ -178,3 +196,14 @@ void clk_unregister_gate(struct clk *clk) kfree(gate); } EXPORT_SYMBOL_GPL(clk_unregister_gate); + +void clk_hw_unregister_gate(struct clk_hw *hw) +{ + struct clk_gate *gate; + + gate = to_clk_gate(hw); + + clk_hw_unregister(hw); + kfree(gate); +} +EXPORT_SYMBOL_GPL(clk_hw_unregister_gate); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 8885d0350596..bf12050aadd5 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -326,7 +326,12 @@ struct clk *clk_register_gate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, u8 clk_gate_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_gate(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, + u8 clk_gate_flags, spinlock_t *lock); void clk_unregister_gate(struct clk *clk); +void clk_hw_unregister_gate(struct clk_hw *hw); struct clk_div_table { unsigned int val; -- cgit v1.2.3 From 264b31719735eb1fcbed47cecdb20f517e804856 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:05:48 -0800 Subject: clk: mux: Add hw based registration APIs Add registration APIs in the clk mux code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-mux.c | 57 +++++++++++++++++++++++++++++++++++++++----- include/linux/clk-provider.h | 11 +++++++++ 2 files changed, 62 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index 252188fd8bcd..16a3d5717f4e 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -113,16 +113,17 @@ const struct clk_ops clk_mux_ro_ops = { }; EXPORT_SYMBOL_GPL(clk_mux_ro_ops); -struct clk *clk_register_mux_table(struct device *dev, const char *name, +struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, u8 clk_mux_flags, u32 *table, spinlock_t *lock) { struct clk_mux *mux; - struct clk *clk; + struct clk_hw *hw; struct clk_init_data init; u8 width = 0; + int ret; if (clk_mux_flags & CLK_MUX_HIWORD_MASK) { width = fls(mask) - ffs(mask) + 1; @@ -157,12 +158,31 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, mux->table = table; mux->hw.init = &init; - clk = clk_register(dev, &mux->hw); - - if (IS_ERR(clk)) + hw = &mux->hw; + ret = clk_hw_register(dev, hw); + if (ret) { kfree(mux); + hw = ERR_PTR(ret); + } - return clk; + return hw; +} +EXPORT_SYMBOL_GPL(clk_hw_register_mux_table); + +struct clk *clk_register_mux_table(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, + unsigned long flags, + void __iomem *reg, u8 shift, u32 mask, + u8 clk_mux_flags, u32 *table, spinlock_t *lock) +{ + struct clk_hw *hw; + + hw = clk_hw_register_mux_table(dev, name, parent_names, num_parents, + flags, reg, shift, mask, clk_mux_flags, + table, lock); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } EXPORT_SYMBOL_GPL(clk_register_mux_table); @@ -180,6 +200,20 @@ struct clk *clk_register_mux(struct device *dev, const char *name, } EXPORT_SYMBOL_GPL(clk_register_mux); +struct clk_hw *clk_hw_register_mux(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, + unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_mux_flags, spinlock_t *lock) +{ + u32 mask = BIT(width) - 1; + + return clk_hw_register_mux_table(dev, name, parent_names, num_parents, + flags, reg, shift, mask, clk_mux_flags, + NULL, lock); +} +EXPORT_SYMBOL_GPL(clk_hw_register_mux); + void clk_unregister_mux(struct clk *clk) { struct clk_mux *mux; @@ -195,3 +229,14 @@ void clk_unregister_mux(struct clk *clk) kfree(mux); } EXPORT_SYMBOL_GPL(clk_unregister_mux); + +void clk_hw_unregister_mux(struct clk_hw *hw) +{ + struct clk_mux *mux; + + mux = to_clk_mux(hw); + + clk_hw_unregister(hw); + kfree(mux); +} +EXPORT_SYMBOL_GPL(clk_hw_unregister_mux); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bf12050aadd5..d690d99b9c1c 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -478,14 +478,25 @@ struct clk *clk_register_mux(struct device *dev, const char *name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_mux_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_mux(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, + unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_mux_flags, spinlock_t *lock); struct clk *clk_register_mux_table(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, u8 clk_mux_flags, u32 *table, spinlock_t *lock); +struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, + unsigned long flags, + void __iomem *reg, u8 shift, u32 mask, + u8 clk_mux_flags, u32 *table, spinlock_t *lock); void clk_unregister_mux(struct clk *clk); +void clk_hw_unregister_mux(struct clk_hw *hw); void of_fixed_factor_clk_setup(struct device_node *node); -- cgit v1.2.3 From 0759ac8a73dc2c8cc8ac697fbe5dbd8d67348d37 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:11:06 -0800 Subject: clk: fixed-factor: Add hw based registration APIs Add registration APIs in the clk fixed-factor code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-fixed-factor.c | 42 +++++++++++++++++++++++++++++++++++------- include/linux/clk-provider.h | 4 ++++ 2 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c index 053448e2453d..75cd6c792cb8 100644 --- a/drivers/clk/clk-fixed-factor.c +++ b/drivers/clk/clk-fixed-factor.c @@ -68,13 +68,14 @@ const struct clk_ops clk_fixed_factor_ops = { }; EXPORT_SYMBOL_GPL(clk_fixed_factor_ops); -struct clk *clk_register_fixed_factor(struct device *dev, const char *name, - const char *parent_name, unsigned long flags, +struct clk_hw *clk_hw_register_fixed_factor(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div) { struct clk_fixed_factor *fix; struct clk_init_data init; - struct clk *clk; + struct clk_hw *hw; + int ret; fix = kmalloc(sizeof(*fix), GFP_KERNEL); if (!fix) @@ -91,12 +92,28 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name, init.parent_names = &parent_name; init.num_parents = 1; - clk = clk_register(dev, &fix->hw); - - if (IS_ERR(clk)) + hw = &fix->hw; + ret = clk_hw_register(dev, hw); + if (ret) { kfree(fix); + hw = ERR_PTR(ret); + } + + return hw; +} +EXPORT_SYMBOL_GPL(clk_hw_register_fixed_factor); + +struct clk *clk_register_fixed_factor(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + unsigned int mult, unsigned int div) +{ + struct clk_hw *hw; - return clk; + hw = clk_hw_register_fixed_factor(dev, name, parent_name, flags, mult, + div); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } EXPORT_SYMBOL_GPL(clk_register_fixed_factor); @@ -113,6 +130,17 @@ void clk_unregister_fixed_factor(struct clk *clk) } EXPORT_SYMBOL_GPL(clk_unregister_fixed_factor); +void clk_hw_unregister_fixed_factor(struct clk_hw *hw) +{ + struct clk_fixed_factor *fix; + + fix = to_clk_fixed_factor(hw); + + clk_hw_unregister(hw); + kfree(fix); +} +EXPORT_SYMBOL_GPL(clk_hw_unregister_fixed_factor); + #ifdef CONFIG_OF /** * of_fixed_factor_clk_setup() - Setup function for simple fixed factor clock diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index d690d99b9c1c..79ad1a8a6831 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -525,6 +525,10 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); void clk_unregister_fixed_factor(struct clk *clk); +struct clk_hw *clk_hw_register_fixed_factor(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + unsigned int mult, unsigned int div); +void clk_hw_unregister_fixed_factor(struct clk_hw *hw); /** * struct clk_fractional_divider - adjustable fractional divider clock -- cgit v1.2.3 From 39b44cff4ad4af6d7abd9dd2acb288b005c26503 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:15:09 -0800 Subject: clk: fractional-divider: Add hw based registration APIs Add registration APIs in the clk fractional divider code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-fractional-divider.c | 40 +++++++++++++++++++++++++++++++----- include/linux/clk-provider.h | 5 +++++ 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c index 1abcd76b4993..aab904618eb6 100644 --- a/drivers/clk/clk-fractional-divider.c +++ b/drivers/clk/clk-fractional-divider.c @@ -116,14 +116,15 @@ const struct clk_ops clk_fractional_divider_ops = { }; EXPORT_SYMBOL_GPL(clk_fractional_divider_ops); -struct clk *clk_register_fractional_divider(struct device *dev, +struct clk_hw *clk_hw_register_fractional_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, u8 clk_divider_flags, spinlock_t *lock) { struct clk_fractional_divider *fd; struct clk_init_data init; - struct clk *clk; + struct clk_hw *hw; + int ret; fd = kzalloc(sizeof(*fd), GFP_KERNEL); if (!fd) @@ -146,10 +147,39 @@ struct clk *clk_register_fractional_divider(struct device *dev, fd->lock = lock; fd->hw.init = &init; - clk = clk_register(dev, &fd->hw); - if (IS_ERR(clk)) + hw = &fd->hw; + ret = clk_hw_register(dev, hw); + if (ret) { kfree(fd); + hw = ERR_PTR(ret); + } + + return hw; +} +EXPORT_SYMBOL_GPL(clk_hw_register_fractional_divider); - return clk; +struct clk *clk_register_fractional_divider(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, + u8 clk_divider_flags, spinlock_t *lock) +{ + struct clk_hw *hw; + + hw = clk_hw_register_fractional_divider(dev, name, parent_name, flags, + reg, mshift, mwidth, nshift, nwidth, clk_divider_flags, + lock); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } EXPORT_SYMBOL_GPL(clk_register_fractional_divider); + +void clk_hw_unregister_fractional_divider(struct clk_hw *hw) +{ + struct clk_fractional_divider *fd; + + fd = to_clk_fd(hw); + + clk_hw_unregister(hw); + kfree(fd); +} diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 79ad1a8a6831..bcbaf6c95d52 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -563,6 +563,11 @@ struct clk *clk_register_fractional_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, u8 clk_divider_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_fractional_divider(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, + u8 clk_divider_flags, spinlock_t *lock); +void clk_hw_unregister_fractional_divider(struct clk_hw *hw); /** * struct clk_multiplier - adjustable multiplier clock -- cgit v1.2.3 From 49cb392d36397a296dcd51ec57cf83585a89a94a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:20:31 -0800 Subject: clk: composite: Add hw based registration APIs Add registration APIs in the clk composite code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-composite.c | 45 ++++++++++++++++++++++++++++++++------------ include/linux/clk-provider.h | 7 +++++++ 2 files changed, 40 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index 1f903e1f86a2..463fadd5a68f 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -184,17 +184,18 @@ static void clk_composite_disable(struct clk_hw *hw) gate_ops->disable(gate_hw); } -struct clk *clk_register_composite(struct device *dev, const char *name, +struct clk_hw *clk_hw_register_composite(struct device *dev, const char *name, const char * const *parent_names, int num_parents, struct clk_hw *mux_hw, const struct clk_ops *mux_ops, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags) { - struct clk *clk; + struct clk_hw *hw; struct clk_init_data init; struct clk_composite *composite; struct clk_ops *clk_composite_ops; + int ret; composite = kzalloc(sizeof(*composite), GFP_KERNEL); if (!composite) @@ -204,12 +205,13 @@ struct clk *clk_register_composite(struct device *dev, const char *name, init.flags = flags | CLK_IS_BASIC; init.parent_names = parent_names; init.num_parents = num_parents; + hw = &composite->hw; clk_composite_ops = &composite->ops; if (mux_hw && mux_ops) { if (!mux_ops->get_parent) { - clk = ERR_PTR(-EINVAL); + hw = ERR_PTR(-EINVAL); goto err; } @@ -224,7 +226,7 @@ struct clk *clk_register_composite(struct device *dev, const char *name, if (rate_hw && rate_ops) { if (!rate_ops->recalc_rate) { - clk = ERR_PTR(-EINVAL); + hw = ERR_PTR(-EINVAL); goto err; } clk_composite_ops->recalc_rate = clk_composite_recalc_rate; @@ -253,7 +255,7 @@ struct clk *clk_register_composite(struct device *dev, const char *name, if (gate_hw && gate_ops) { if (!gate_ops->is_enabled || !gate_ops->enable || !gate_ops->disable) { - clk = ERR_PTR(-EINVAL); + hw = ERR_PTR(-EINVAL); goto err; } @@ -267,22 +269,41 @@ struct clk *clk_register_composite(struct device *dev, const char *name, init.ops = clk_composite_ops; composite->hw.init = &init; - clk = clk_register(dev, &composite->hw); - if (IS_ERR(clk)) + ret = clk_hw_register(dev, hw); + if (ret) { + hw = ERR_PTR(ret); goto err; + } if (composite->mux_hw) - composite->mux_hw->clk = clk; + composite->mux_hw->clk = hw->clk; if (composite->rate_hw) - composite->rate_hw->clk = clk; + composite->rate_hw->clk = hw->clk; if (composite->gate_hw) - composite->gate_hw->clk = clk; + composite->gate_hw->clk = hw->clk; - return clk; + return hw; err: kfree(composite); - return clk; + return hw; +} + +struct clk *clk_register_composite(struct device *dev, const char *name, + const char * const *parent_names, int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags) +{ + struct clk_hw *hw; + + hw = clk_hw_register_composite(dev, name, parent_names, num_parents, + mux_hw, mux_ops, rate_hw, rate_ops, gate_hw, gate_ops, + flags); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bcbaf6c95d52..456c3ced1ac9 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -638,6 +638,13 @@ struct clk *clk_register_composite(struct device *dev, const char *name, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags); +struct clk_hw *clk_hw_register_composite(struct device *dev, const char *name, + const char * const *parent_names, int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags); +void clk_hw_unregister_composite(struct clk_hw *hw); /*** * struct clk_gpio_gate - gpio gated clock -- cgit v1.2.3 From b120743a64a3ec68b8c5310a6009094329b4a33b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:27:55 -0800 Subject: clk: gpio: Add hw based registration APIs Add registration APIs in the clk gpio code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-gpio.c | 52 ++++++++++++++++++++++++++++++++++---------- include/linux/clk-provider.h | 8 +++++++ 2 files changed, 49 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c index 08f65acc5d57..86b245746a6b 100644 --- a/drivers/clk/clk-gpio.c +++ b/drivers/clk/clk-gpio.c @@ -94,13 +94,13 @@ const struct clk_ops clk_gpio_mux_ops = { }; EXPORT_SYMBOL_GPL(clk_gpio_mux_ops); -static struct clk *clk_register_gpio(struct device *dev, const char *name, +static struct clk_hw *clk_register_gpio(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low, unsigned long flags, const struct clk_ops *clk_gpio_ops) { struct clk_gpio *clk_gpio; - struct clk *clk; + struct clk_hw *hw; struct clk_init_data init = {}; unsigned long gpio_flags; int err; @@ -141,24 +141,26 @@ static struct clk *clk_register_gpio(struct device *dev, const char *name, clk_gpio->gpiod = gpio_to_desc(gpio); clk_gpio->hw.init = &init; + hw = &clk_gpio->hw; if (dev) - clk = devm_clk_register(dev, &clk_gpio->hw); + err = devm_clk_hw_register(dev, hw); else - clk = clk_register(NULL, &clk_gpio->hw); + err = clk_hw_register(NULL, hw); - if (!IS_ERR(clk)) - return clk; + if (!err) + return hw; if (!dev) { gpiod_put(clk_gpio->gpiod); kfree(clk_gpio); } - return clk; + return ERR_PTR(err); } /** - * clk_register_gpio_gate - register a gpio clock gate with the clock framework + * clk_hw_register_gpio_gate - register a gpio clock gate with the clock + * framework * @dev: device that is registering this clock * @name: name of this clock * @parent_name: name of this clock's parent @@ -166,7 +168,7 @@ static struct clk *clk_register_gpio(struct device *dev, const char *name, * @active_low: true if gpio should be set to 0 to enable clock * @flags: clock flags */ -struct clk *clk_register_gpio_gate(struct device *dev, const char *name, +struct clk_hw *clk_hw_register_gpio_gate(struct device *dev, const char *name, const char *parent_name, unsigned gpio, bool active_low, unsigned long flags) { @@ -175,10 +177,24 @@ struct clk *clk_register_gpio_gate(struct device *dev, const char *name, (parent_name ? 1 : 0), gpio, active_low, flags, &clk_gpio_gate_ops); } +EXPORT_SYMBOL_GPL(clk_hw_register_gpio_gate); + +struct clk *clk_register_gpio_gate(struct device *dev, const char *name, + const char *parent_name, unsigned gpio, bool active_low, + unsigned long flags) +{ + struct clk_hw *hw; + + hw = clk_hw_register_gpio_gate(dev, name, parent_name, gpio, active_low, + flags); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; +} EXPORT_SYMBOL_GPL(clk_register_gpio_gate); /** - * clk_register_gpio_mux - register a gpio clock mux with the clock framework + * clk_hw_register_gpio_mux - register a gpio clock mux with the clock framework * @dev: device that is registering this clock * @name: name of this clock * @parent_names: names of this clock's parents @@ -187,7 +203,7 @@ EXPORT_SYMBOL_GPL(clk_register_gpio_gate); * @active_low: true if gpio should be set to 0 to enable clock * @flags: clock flags */ -struct clk *clk_register_gpio_mux(struct device *dev, const char *name, +struct clk_hw *clk_hw_register_gpio_mux(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low, unsigned long flags) { @@ -199,6 +215,20 @@ struct clk *clk_register_gpio_mux(struct device *dev, const char *name, return clk_register_gpio(dev, name, parent_names, num_parents, gpio, active_low, flags, &clk_gpio_mux_ops); } +EXPORT_SYMBOL_GPL(clk_hw_register_gpio_mux); + +struct clk *clk_register_gpio_mux(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, unsigned gpio, + bool active_low, unsigned long flags) +{ + struct clk_hw *hw; + + hw = clk_hw_register_gpio_mux(dev, name, parent_names, num_parents, + gpio, active_low, flags); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; +} EXPORT_SYMBOL_GPL(clk_register_gpio_mux); static int gpio_clk_driver_probe(struct platform_device *pdev) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 456c3ced1ac9..6c36c5e8ccbe 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -667,6 +667,10 @@ extern const struct clk_ops clk_gpio_gate_ops; struct clk *clk_register_gpio_gate(struct device *dev, const char *name, const char *parent_name, unsigned gpio, bool active_low, unsigned long flags); +struct clk_hw *clk_hw_register_gpio_gate(struct device *dev, const char *name, + const char *parent_name, unsigned gpio, bool active_low, + unsigned long flags); +void clk_hw_unregister_gpio_gate(struct clk_hw *hw); /** * struct clk_gpio_mux - gpio controlled clock multiplexer @@ -682,6 +686,10 @@ extern const struct clk_ops clk_gpio_mux_ops; struct clk *clk_register_gpio_mux(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low, unsigned long flags); +struct clk_hw *clk_hw_register_gpio_mux(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, unsigned gpio, + bool active_low, unsigned long flags); +void clk_hw_unregister_gpio_mux(struct clk_hw *hw); /** * clk_register - allocate a new clock, register it and return an opaque cookie -- cgit v1.2.3 From 26ef56be9e0944a9b136169eb47140f309ce745b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:34:13 -0800 Subject: clk: fixed-rate: Add hw based registration APIs Add registration APIs in the clk fixed-rate code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- drivers/clk/clk-fixed-rate.c | 44 ++++++++++++++++++++++++++++++++++++-------- include/linux/clk-provider.h | 7 +++++++ 2 files changed, 43 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c index cd9dc925b3f8..8e4453eb54e8 100644 --- a/drivers/clk/clk-fixed-rate.c +++ b/drivers/clk/clk-fixed-rate.c @@ -45,8 +45,8 @@ const struct clk_ops clk_fixed_rate_ops = { EXPORT_SYMBOL_GPL(clk_fixed_rate_ops); /** - * clk_register_fixed_rate_with_accuracy - register fixed-rate clock with the - * clock framework + * clk_hw_register_fixed_rate_with_accuracy - register fixed-rate clock with + * the clock framework * @dev: device that is registering this clock * @name: name of this clock * @parent_name: name of clock's parent @@ -54,13 +54,14 @@ EXPORT_SYMBOL_GPL(clk_fixed_rate_ops); * @fixed_rate: non-adjustable clock rate * @fixed_accuracy: non-adjustable clock rate */ -struct clk *clk_register_fixed_rate_with_accuracy(struct device *dev, +struct clk_hw *clk_hw_register_fixed_rate_with_accuracy(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate, unsigned long fixed_accuracy) { struct clk_fixed_rate *fixed; - struct clk *clk; + struct clk_hw *hw; struct clk_init_data init; + int ret; /* allocate fixed-rate clock */ fixed = kzalloc(sizeof(*fixed), GFP_KERNEL); @@ -79,22 +80,49 @@ struct clk *clk_register_fixed_rate_with_accuracy(struct device *dev, fixed->hw.init = &init; /* register the clock */ - clk = clk_register(dev, &fixed->hw); - if (IS_ERR(clk)) + hw = &fixed->hw; + ret = clk_hw_register(dev, hw); + if (ret) { kfree(fixed); + hw = ERR_PTR(ret); + } - return clk; + return hw; +} +EXPORT_SYMBOL_GPL(clk_hw_register_fixed_rate_with_accuracy); + +struct clk *clk_register_fixed_rate_with_accuracy(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + unsigned long fixed_rate, unsigned long fixed_accuracy) +{ + struct clk_hw *hw; + + hw = clk_hw_register_fixed_rate_with_accuracy(dev, name, parent_name, + flags, fixed_rate, fixed_accuracy); + if (IS_ERR(hw)) + return ERR_CAST(hw); + return hw->clk; } EXPORT_SYMBOL_GPL(clk_register_fixed_rate_with_accuracy); /** - * clk_register_fixed_rate - register fixed-rate clock with the clock framework + * clk_hw_register_fixed_rate - register fixed-rate clock with the clock + * framework * @dev: device that is registering this clock * @name: name of this clock * @parent_name: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate */ +struct clk_hw *clk_hw_register_fixed_rate(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + unsigned long fixed_rate) +{ + return clk_hw_register_fixed_rate_with_accuracy(dev, name, parent_name, + flags, fixed_rate, 0); +} +EXPORT_SYMBOL_GPL(clk_hw_register_fixed_rate); + struct clk *clk_register_fixed_rate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 6c36c5e8ccbe..c3fc042d517c 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -282,10 +282,17 @@ extern const struct clk_ops clk_fixed_rate_ops; struct clk *clk_register_fixed_rate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate); +struct clk_hw *clk_hw_register_fixed_rate(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + unsigned long fixed_rate); struct clk *clk_register_fixed_rate_with_accuracy(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate, unsigned long fixed_accuracy); void clk_unregister_fixed_rate(struct clk *clk); +struct clk_hw *clk_hw_register_fixed_rate_with_accuracy(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + unsigned long fixed_rate, unsigned long fixed_accuracy); + void of_fixed_clk_setup(struct device_node *np); /** -- cgit v1.2.3 From 607ea7cda6315be0ad8be2f98bc9de6f2d656ae6 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 18 Apr 2016 14:41:10 +0300 Subject: net/ipv6/addrconf: simplify sysctl registration Struct ctl_table_header holds pointer to sysctl table which could be used for freeing it after unregistration. IPv4 sysctls already use that. Remove redundant NULL assignment: ndev allocated using kzalloc. This also saves some bytes: sysctl table could be shorter than DEVCONF_MAX+1 if some options are disable in config. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- net/ipv6/addrconf.c | 43 +++++++++++++++++-------------------------- 2 files changed, 19 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7edc14fb66b6..58d6e158755f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -63,7 +63,8 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; - void *sysctl; + + struct ctl_table_header *sysctl_header; }; struct ipv6_params { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a6c99275bd8c..5a42c0fe0449 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -359,7 +359,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; ndev->cnf.mtu6 = dev->mtu; - ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (!ndev->nd_parms) { kfree(ndev); @@ -5620,13 +5619,7 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } -static struct addrconf_sysctl_table -{ - struct ctl_table_header *sysctl_header; - struct ctl_table addrconf_vars[DEVCONF_MAX+1]; -} addrconf_sysctl __read_mostly = { - .sysctl_header = NULL, - .addrconf_vars = { +static const struct ctl_table addrconf_sysctl[] = { { .procname = "forwarding", .data = &ipv6_devconf.forwarding, @@ -5944,52 +5937,50 @@ static struct addrconf_sysctl_table { /* sentinel */ } - }, }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct addrconf_sysctl_table *t; + struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; - t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (!t) + table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL); + if (!table) goto out; - for (i = 0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; - t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ - t->addrconf_vars[i].extra2 = net; + for (i = 0; table[i].data; i++) { + table[i].data += (char *)p - (char *)&ipv6_devconf; + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); - t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); - if (!t->sysctl_header) + p->sysctl_header = register_net_sysctl(net, path, table); + if (!p->sysctl_header) goto free; - p->sysctl = t; return 0; free: - kfree(t); + kfree(table); out: return -ENOBUFS; } static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) { - struct addrconf_sysctl_table *t; + struct ctl_table *table; - if (!p->sysctl) + if (!p->sysctl_header) return; - t = p->sysctl; - p->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); + table = p->sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(p->sysctl_header); + p->sysctl_header = NULL; + kfree(table); } static int addrconf_sysctl_register(struct inet6_dev *idev) -- cgit v1.2.3 From bd570ff970a54df653b48ed0cfb373f2ebed083d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 18 Apr 2016 21:01:24 +0200 Subject: bpf: add event output helper for notifications/sampling/logging This patch adds a new helper for cls/act programs that can push events to user space applications. For networking, this can be f.e. for sampling, debugging, logging purposes or pushing of arbitrary wake-up events. The idea is similar to a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") and 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example"). The eBPF program utilizes a perf event array map that user space populates with fds from perf_event_open(), the eBPF program calls into the helper f.e. as skb_event_output(skb, &my_map, BPF_F_CURRENT_CPU, raw, sizeof(raw)) so that the raw data is pushed into the fd f.e. at the map index of the current CPU. User space can poll/mmap/etc on this and has a data channel for receiving events that can be post-processed. The nice thing is that since the eBPF program and user space application making use of it are tightly coupled, they can define their own arbitrary raw data format and what/when they want to push. While f.e. packet headers could be one part of the meta data that is being pushed, this is not a substitute for things like packet sockets as whole packet is not being pushed and push is only done in a single direction. Intention is more of a generically usable, efficient event pipe to applications. Workflow is that tc can pin the map and applications can attach themselves e.g. after cls/act setup to one or multiple map slots, demuxing is done by the eBPF program. Adding this facility is with minimal effort, it reuses the helper introduced in a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") and we get its functionality for free by overloading its BPF_FUNC_ identifier for cls/act programs, ctx is currently unused, but will be made use of in future. Example will be added to iproute2's BPF example files. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ kernel/bpf/core.c | 7 +++++++ kernel/trace/bpf_trace.c | 27 +++++++++++++++++++++++++++ net/core/filter.c | 2 ++ 4 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5fb3c610fa96..f63afdc43bec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -169,7 +169,9 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +const struct bpf_func_proto *bpf_get_event_output_proto(void); #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index be0abf669ced..e4248fe79513 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -764,14 +764,21 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; + const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; + const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { return NULL; } +const struct bpf_func_proto * __weak bpf_get_event_output_proto(void) +{ + return NULL; +} + /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { .func = NULL, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b3cc24cb4321..780bcbe1d4de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -277,6 +277,33 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg5_type = ARG_CONST_STACK_SIZE, }; +static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); + +static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +{ + struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); + + perf_fetch_caller_regs(regs); + + return bpf_perf_event_output((long)regs, r2, flags, r4, size); +} + +static const struct bpf_func_proto bpf_event_output_proto = { + .func = bpf_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + +const struct bpf_func_proto *bpf_get_event_output_proto(void) +{ + return &bpf_event_output_proto; +} + static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { diff --git a/net/core/filter.c b/net/core/filter.c index 5d2ac2b9d1c4..218e5de8c402 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2039,6 +2039,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_redirect_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; + case BPF_FUNC_perf_event_output: + return bpf_get_event_output_proto(); default: return sk_filter_func_proto(func_id); } -- cgit v1.2.3 From b853cb9628bfbcc4017da46d5f5b46e3eba9d8c6 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 28 Mar 2016 21:35:22 -0700 Subject: soc: qcom: smd: Make callback pass channel reference By passing the smd channel reference to the callback, rather than the smd device, we can open additional smd channels from sub-devices of smd devices. Also updates the two smd clients today found in mainline. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/soc/qcom/smd-rpm.c | 9 ++++++--- drivers/soc/qcom/smd.c | 22 ++++++++++++++++++---- drivers/soc/qcom/wcnss_ctrl.c | 8 ++++---- include/linux/soc/qcom/smd.h | 7 +++++-- 4 files changed, 33 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/soc/qcom/smd-rpm.c b/drivers/soc/qcom/smd-rpm.c index 731fa066f712..6609d7e0edb0 100644 --- a/drivers/soc/qcom/smd-rpm.c +++ b/drivers/soc/qcom/smd-rpm.c @@ -33,6 +33,7 @@ */ struct qcom_smd_rpm { struct qcom_smd_channel *rpm_channel; + struct device *dev; struct completion ack; struct mutex lock; @@ -149,14 +150,14 @@ out: } EXPORT_SYMBOL(qcom_rpm_smd_write); -static int qcom_smd_rpm_callback(struct qcom_smd_device *qsdev, +static int qcom_smd_rpm_callback(struct qcom_smd_channel *channel, const void *data, size_t count) { const struct qcom_rpm_header *hdr = data; size_t hdr_length = le32_to_cpu(hdr->length); const struct qcom_rpm_message *msg; - struct qcom_smd_rpm *rpm = dev_get_drvdata(&qsdev->dev); + struct qcom_smd_rpm *rpm = qcom_smd_get_drvdata(channel); const u8 *buf = data + sizeof(struct qcom_rpm_header); const u8 *end = buf + hdr_length; char msgbuf[32]; @@ -165,7 +166,7 @@ static int qcom_smd_rpm_callback(struct qcom_smd_device *qsdev, if (le32_to_cpu(hdr->service_type) != RPM_SERVICE_TYPE_REQUEST || hdr_length < sizeof(struct qcom_rpm_message)) { - dev_err(&qsdev->dev, "invalid request\n"); + dev_err(rpm->dev, "invalid request\n"); return 0; } @@ -206,7 +207,9 @@ static int qcom_smd_rpm_probe(struct qcom_smd_device *sdev) mutex_init(&rpm->lock); init_completion(&rpm->ack); + rpm->dev = &sdev->dev; rpm->rpm_channel = sdev->channel; + qcom_smd_set_drvdata(sdev->channel, rpm); dev_set_drvdata(&sdev->dev, rpm); diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c index b6434c4be86a..ac1957dfdf24 100644 --- a/drivers/soc/qcom/smd.c +++ b/drivers/soc/qcom/smd.c @@ -194,6 +194,8 @@ struct qcom_smd_channel { int pkt_size; + void *drvdata; + struct list_head list; struct list_head dev_list; }; @@ -513,7 +515,6 @@ static void qcom_smd_channel_advance(struct qcom_smd_channel *channel, */ static int qcom_smd_channel_recv_single(struct qcom_smd_channel *channel) { - struct qcom_smd_device *qsdev = channel->qsdev; unsigned tail; size_t len; void *ptr; @@ -533,7 +534,7 @@ static int qcom_smd_channel_recv_single(struct qcom_smd_channel *channel) len = channel->pkt_size; } - ret = channel->cb(qsdev, ptr, len); + ret = channel->cb(channel, ptr, len); if (ret < 0) return ret; @@ -1034,6 +1035,18 @@ int qcom_smd_driver_register(struct qcom_smd_driver *qsdrv) } EXPORT_SYMBOL(qcom_smd_driver_register); +void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel) +{ + return channel->drvdata; +} +EXPORT_SYMBOL(qcom_smd_get_drvdata); + +void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data) +{ + channel->drvdata = data; +} +EXPORT_SYMBOL(qcom_smd_set_drvdata); + /** * qcom_smd_driver_unregister - unregister a smd driver * @qsdrv: qcom_smd_driver struct @@ -1079,12 +1092,13 @@ qcom_smd_find_channel(struct qcom_smd_edge *edge, const char *name) * Returns a channel handle on success, or -EPROBE_DEFER if the channel isn't * ready. */ -struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_device *sdev, +struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *parent, const char *name, qcom_smd_cb_t cb) { struct qcom_smd_channel *channel; - struct qcom_smd_edge *edge = sdev->channel->edge; + struct qcom_smd_device *sdev = parent->qsdev; + struct qcom_smd_edge *edge = parent->edge; int ret; /* Wait up to HZ for the channel to appear */ diff --git a/drivers/soc/qcom/wcnss_ctrl.c b/drivers/soc/qcom/wcnss_ctrl.c index 7a986f881d5c..c544f3d2c6ee 100644 --- a/drivers/soc/qcom/wcnss_ctrl.c +++ b/drivers/soc/qcom/wcnss_ctrl.c @@ -100,17 +100,17 @@ struct wcnss_download_nv_resp { /** * wcnss_ctrl_smd_callback() - handler from SMD responses - * @qsdev: smd device handle + * @channel: smd channel handle * @data: pointer to the incoming data packet * @count: size of the incoming data packet * * Handles any incoming packets from the remote WCNSS_CTRL service. */ -static int wcnss_ctrl_smd_callback(struct qcom_smd_device *qsdev, +static int wcnss_ctrl_smd_callback(struct qcom_smd_channel *channel, const void *data, size_t count) { - struct wcnss_ctrl *wcnss = dev_get_drvdata(&qsdev->dev); + struct wcnss_ctrl *wcnss = qcom_smd_get_drvdata(channel); const struct wcnss_download_nv_resp *nvresp; const struct wcnss_version_resp *version; const struct wcnss_msg_hdr *hdr = data; @@ -246,7 +246,7 @@ static int wcnss_ctrl_probe(struct qcom_smd_device *sdev) init_completion(&wcnss->ack); INIT_WORK(&wcnss->download_nv_work, wcnss_download_nv); - dev_set_drvdata(&sdev->dev, wcnss); + qcom_smd_set_drvdata(sdev->channel, wcnss); return wcnss_request_version(wcnss); } diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index bd51c8a9d807..cb2f81559bc0 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -26,7 +26,7 @@ struct qcom_smd_device { struct qcom_smd_channel *channel; }; -typedef int (*qcom_smd_cb_t)(struct qcom_smd_device *, const void *, size_t); +typedef int (*qcom_smd_cb_t)(struct qcom_smd_channel *, const void *, size_t); /** * struct qcom_smd_driver - smd driver struct @@ -50,13 +50,16 @@ struct qcom_smd_driver { int qcom_smd_driver_register(struct qcom_smd_driver *drv); void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); +void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel); +void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data); + #define module_qcom_smd_driver(__smd_driver) \ module_driver(__smd_driver, qcom_smd_driver_register, \ qcom_smd_driver_unregister) int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); -struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_device *sdev, +struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *channel, const char *name, qcom_smd_cb_t cb); -- cgit v1.2.3 From 12524e348bcb2189a8cf43829e90256f7e7d4f3d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Apr 2016 11:18:06 +0200 Subject: clk: renesas: mstp: Provide dummy attach/detach_dev callbacks Provide dummy cpg_mstp_{at,de}tach_dev() PM Domain callbacks if CPG/MSTP support is not included, so the rcar-sysc driver won't have to care about this. Signed-off-by: Geert Uytterhoeven Acked-by: Laurent Pinchart --- include/linux/clk/renesas.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 095b1681daf4..ab57a298a374 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -25,7 +25,12 @@ void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); void cpg_mstp_add_clk_domain(struct device_node *np); +#ifdef CONFIG_CLK_RENESAS_CPG_MSTP int cpg_mstp_attach_dev(struct generic_pm_domain *unused, struct device *dev); void cpg_mstp_detach_dev(struct generic_pm_domain *unused, struct device *dev); +#else +#define cpg_mstp_attach_dev NULL +#define cpg_mstp_detach_dev NULL +#endif #endif -- cgit v1.2.3 From 2066390ad47b374f3d35075a32325b47d15bf735 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Mar 2016 17:03:46 +0100 Subject: clk: renesas: cpg-mssr: Export cpg_mssr_{at,de}tach_dev() The R-Car SYSC PM Domain driver has to power manage devices in power areas using clocks. To reuse code and to share knowledge of clocks suitable for power management, this is ideally done through the existing cpg_mssr_attach_dev() and cpg_mssr_detach_dev() callbacks. Hence these callbacks can no longer rely on their "domain" parameter pointing to the CPG/MSSR Clock Domain. To handle this, keep a pointer to the clock domain in a static variable. cpg_mssr_attach_dev() has to support probe deferral, as the R-Car SYSC PM Domain may be initialized, and devices may be added to it, before the CPG/MSSR Clock Domain is initialized. Dummy callbacks are provided for the case where CPG/MSTP support is not included, so the rcar-sysc driver won't have to care about this. Signed-off-by: Geert Uytterhoeven Acked-by: Laurent Pinchart --- drivers/clk/renesas/renesas-cpg-mssr.c | 18 ++++++++++++------ include/linux/clk/renesas.h | 7 +++++++ 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c index 703bdb157528..1f2dc3629f0e 100644 --- a/drivers/clk/renesas/renesas-cpg-mssr.c +++ b/drivers/clk/renesas/renesas-cpg-mssr.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -388,6 +389,8 @@ struct cpg_mssr_clk_domain { unsigned int core_pm_clks[0]; }; +static struct cpg_mssr_clk_domain *cpg_mssr_clk_domain; + static bool cpg_mssr_is_pm_clk(const struct of_phandle_args *clkspec, struct cpg_mssr_clk_domain *pd) { @@ -411,17 +414,20 @@ static bool cpg_mssr_is_pm_clk(const struct of_phandle_args *clkspec, } } -static int cpg_mssr_attach_dev(struct generic_pm_domain *genpd, - struct device *dev) +int cpg_mssr_attach_dev(struct generic_pm_domain *unused, struct device *dev) { - struct cpg_mssr_clk_domain *pd = - container_of(genpd, struct cpg_mssr_clk_domain, genpd); + struct cpg_mssr_clk_domain *pd = cpg_mssr_clk_domain; struct device_node *np = dev->of_node; struct of_phandle_args clkspec; struct clk *clk; int i = 0; int error; + if (!pd) { + dev_dbg(dev, "CPG/MSSR clock domain not yet available\n"); + return -EPROBE_DEFER; + } + while (!of_parse_phandle_with_args(np, "clocks", "#clock-cells", i, &clkspec)) { if (cpg_mssr_is_pm_clk(&clkspec, pd)) @@ -461,8 +467,7 @@ fail_put: return error; } -static void cpg_mssr_detach_dev(struct generic_pm_domain *genpd, - struct device *dev) +void cpg_mssr_detach_dev(struct generic_pm_domain *unused, struct device *dev) { if (!list_empty(&dev->power.subsys_data->clock_list)) pm_clk_destroy(dev); @@ -491,6 +496,7 @@ static int __init cpg_mssr_add_clk_domain(struct device *dev, pm_genpd_init(genpd, &simple_qos_governor, false); genpd->attach_dev = cpg_mssr_attach_dev; genpd->detach_dev = cpg_mssr_detach_dev; + cpg_mssr_clk_domain = pd; of_genpd_add_provider_simple(np, genpd); return 0; diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index ab57a298a374..ba6fa4148515 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -33,4 +33,11 @@ void cpg_mstp_detach_dev(struct generic_pm_domain *unused, struct device *dev); #define cpg_mstp_detach_dev NULL #endif +#ifdef CONFIG_CLK_RENESAS_CPG_MSSR +int cpg_mssr_attach_dev(struct generic_pm_domain *unused, struct device *dev); +void cpg_mssr_detach_dev(struct generic_pm_domain *unused, struct device *dev); +#else +#define cpg_mssr_attach_dev NULL +#define cpg_mssr_detach_dev NULL +#endif #endif -- cgit v1.2.3 From 58ea8abf490415c390e0cc671e875510c9b66318 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Mon, 18 Apr 2016 09:21:44 -0500 Subject: crypto: ccp - Register the CCP as a DMA resource The CCP has the ability to provide DMA services to the kernel using pass-through mode of the device. Register these services as general purpose DMA channels. Changes since v2: - Add a Signed-off-by Changes since v1: - Allocate memory for a string in ccp_dmaengine_register - Ensure register/unregister calls are properly ordered - Verified all changed files are listed in the diffstat - Undo some superfluous changes - Added a cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/Kconfig | 1 + drivers/crypto/ccp/Makefile | 6 +- drivers/crypto/ccp/ccp-dev-v3.c | 11 + drivers/crypto/ccp/ccp-dev.h | 47 +++ drivers/crypto/ccp/ccp-dmaengine.c | 727 +++++++++++++++++++++++++++++++++++++ drivers/crypto/ccp/ccp-ops.c | 69 +++- include/linux/ccp.h | 36 +- 7 files changed, 893 insertions(+), 4 deletions(-) create mode 100644 drivers/crypto/ccp/ccp-dmaengine.c (limited to 'include/linux') diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig index 6e37845abf8f..79cabfba2a2a 100644 --- a/drivers/crypto/ccp/Kconfig +++ b/drivers/crypto/ccp/Kconfig @@ -3,6 +3,7 @@ config CRYPTO_DEV_CCP_DD depends on CRYPTO_DEV_CCP default m select HW_RANDOM + select DMA_ENGINE select CRYPTO_SHA1 select CRYPTO_SHA256 help diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile index b750592cc936..ee4d2741b3ab 100644 --- a/drivers/crypto/ccp/Makefile +++ b/drivers/crypto/ccp/Makefile @@ -1,5 +1,9 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o -ccp-objs := ccp-dev.o ccp-ops.o ccp-dev-v3.o ccp-platform.o +ccp-objs := ccp-dev.o \ + ccp-ops.o \ + ccp-dev-v3.o \ + ccp-platform.o \ + ccp-dmaengine.o ccp-$(CONFIG_PCI) += ccp-pci.o obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 7d5eab49179e..597fc50bdaa6 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -406,6 +406,11 @@ static int ccp_init(struct ccp_device *ccp) goto e_kthread; } + /* Register the DMA engine support */ + ret = ccp_dmaengine_register(ccp); + if (ret) + goto e_hwrng; + ccp_add_device(ccp); /* Enable interrupts */ @@ -413,6 +418,9 @@ static int ccp_init(struct ccp_device *ccp) return 0; +e_hwrng: + hwrng_unregister(&ccp->hwrng); + e_kthread: for (i = 0; i < ccp->cmd_q_count; i++) if (ccp->cmd_q[i].kthread) @@ -436,6 +444,9 @@ static void ccp_destroy(struct ccp_device *ccp) /* Remove this device from the list of available units first */ ccp_del_device(ccp); + /* Unregister the DMA engine */ + ccp_dmaengine_unregister(ccp); + /* Unregister the RNG */ hwrng_unregister(&ccp->hwrng); diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 7745d0be491d..5d986c9d72eb 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #define MAX_CCP_NAME_LEN 16 #define MAX_DMAPOOL_NAME_LEN 32 @@ -167,6 +170,39 @@ extern struct ccp_vdata ccpv3; struct ccp_device; struct ccp_cmd; +struct ccp_dma_cmd { + struct list_head entry; + + struct ccp_cmd ccp_cmd; +}; + +struct ccp_dma_desc { + struct list_head entry; + + struct ccp_device *ccp; + + struct list_head pending; + struct list_head active; + + enum dma_status status; + struct dma_async_tx_descriptor tx_desc; + size_t len; +}; + +struct ccp_dma_chan { + struct ccp_device *ccp; + + spinlock_t lock; + struct list_head pending; + struct list_head active; + struct list_head complete; + + struct tasklet_struct cleanup_tasklet; + + enum dma_status status; + struct dma_chan dma_chan; +}; + struct ccp_cmd_queue { struct ccp_device *ccp; @@ -260,6 +296,14 @@ struct ccp_device { struct hwrng hwrng; unsigned int hwrng_retries; + /* + * Support for the CCP DMA capabilities + */ + struct dma_device dma_dev; + struct ccp_dma_chan *ccp_dma_chan; + struct kmem_cache *dma_cmd_cache; + struct kmem_cache *dma_desc_cache; + /* * A counter used to generate job-ids for cmds submitted to the CCP */ @@ -418,4 +462,7 @@ int ccp_cmd_queue_thread(void *data); int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); +int ccp_dmaengine_register(struct ccp_device *ccp); +void ccp_dmaengine_unregister(struct ccp_device *ccp); + #endif diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c new file mode 100644 index 000000000000..94f77b0f9ae7 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -0,0 +1,727 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include "ccp-dev.h" +#include "../../dma/dmaengine.h" + +#define CCP_DMA_WIDTH(_mask) \ +({ \ + u64 mask = _mask + 1; \ + (mask == 0) ? 64 : fls64(mask); \ +}) + +static void ccp_free_cmd_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_cmd *cmd, *ctmp; + + list_for_each_entry_safe(cmd, ctmp, list, entry) { + list_del(&cmd->entry); + kmem_cache_free(ccp->dma_cmd_cache, cmd); + } +} + +static void ccp_free_desc_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_desc *desc, *dtmp; + + list_for_each_entry_safe(desc, dtmp, list, entry) { + ccp_free_cmd_resources(ccp, &desc->active); + ccp_free_cmd_resources(ccp, &desc->pending); + + list_del(&desc->entry); + kmem_cache_free(ccp->dma_desc_cache, desc); + } +} + +static void ccp_free_chan_resources(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s - chan=%p\n", __func__, chan); + + spin_lock_irqsave(&chan->lock, flags); + + ccp_free_desc_resources(chan->ccp, &chan->complete); + ccp_free_desc_resources(chan->ccp, &chan->active); + ccp_free_desc_resources(chan->ccp, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void ccp_cleanup_desc_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_desc *desc, *dtmp; + + list_for_each_entry_safe_reverse(desc, dtmp, list, entry) { + if (!async_tx_test_ack(&desc->tx_desc)) + continue; + + dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc); + + ccp_free_cmd_resources(ccp, &desc->active); + ccp_free_cmd_resources(ccp, &desc->pending); + + list_del(&desc->entry); + kmem_cache_free(ccp->dma_desc_cache, desc); + } +} + +static void ccp_do_cleanup(unsigned long data) +{ + struct ccp_dma_chan *chan = (struct ccp_dma_chan *)data; + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s - chan=%s\n", __func__, + dma_chan_name(&chan->dma_chan)); + + spin_lock_irqsave(&chan->lock, flags); + + ccp_cleanup_desc_resources(chan->ccp, &chan->complete); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static int ccp_issue_next_cmd(struct ccp_dma_desc *desc) +{ + struct ccp_dma_cmd *cmd; + int ret; + + cmd = list_first_entry(&desc->pending, struct ccp_dma_cmd, entry); + list_move(&cmd->entry, &desc->active); + + dev_dbg(desc->ccp->dev, "%s - tx %d, cmd=%p\n", __func__, + desc->tx_desc.cookie, cmd); + + ret = ccp_enqueue_cmd(&cmd->ccp_cmd); + if (!ret || (ret == -EINPROGRESS) || (ret == -EBUSY)) + return 0; + + dev_dbg(desc->ccp->dev, "%s - error: ret=%d, tx %d, cmd=%p\n", __func__, + ret, desc->tx_desc.cookie, cmd); + + return ret; +} + +static void ccp_free_active_cmd(struct ccp_dma_desc *desc) +{ + struct ccp_dma_cmd *cmd; + + cmd = list_first_entry_or_null(&desc->active, struct ccp_dma_cmd, + entry); + if (!cmd) + return; + + dev_dbg(desc->ccp->dev, "%s - freeing tx %d cmd=%p\n", + __func__, desc->tx_desc.cookie, cmd); + + list_del(&cmd->entry); + kmem_cache_free(desc->ccp->dma_cmd_cache, cmd); +} + +static struct ccp_dma_desc *__ccp_next_dma_desc(struct ccp_dma_chan *chan, + struct ccp_dma_desc *desc) +{ + /* Move current DMA descriptor to the complete list */ + if (desc) + list_move(&desc->entry, &chan->complete); + + /* Get the next DMA descriptor on the active list */ + desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc, + entry); + + return desc; +} + +static struct ccp_dma_desc *ccp_handle_active_desc(struct ccp_dma_chan *chan, + struct ccp_dma_desc *desc) +{ + struct dma_async_tx_descriptor *tx_desc; + unsigned long flags; + + /* Loop over descriptors until one is found with commands */ + do { + if (desc) { + /* Remove the DMA command from the list and free it */ + ccp_free_active_cmd(desc); + + if (!list_empty(&desc->pending)) { + /* No errors, keep going */ + if (desc->status != DMA_ERROR) + return desc; + + /* Error, free remaining commands and move on */ + ccp_free_cmd_resources(desc->ccp, + &desc->pending); + } + + tx_desc = &desc->tx_desc; + } else { + tx_desc = NULL; + } + + spin_lock_irqsave(&chan->lock, flags); + + if (desc) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; + + dev_dbg(desc->ccp->dev, + "%s - tx %d complete, status=%u\n", __func__, + desc->tx_desc.cookie, desc->status); + + dma_cookie_complete(tx_desc); + } + + desc = __ccp_next_dma_desc(chan, desc); + + spin_unlock_irqrestore(&chan->lock, flags); + + if (tx_desc) { + if (tx_desc->callback && + (tx_desc->flags & DMA_PREP_INTERRUPT)) + tx_desc->callback(tx_desc->callback_param); + + dma_run_dependencies(tx_desc); + } + } while (desc); + + return NULL; +} + +static struct ccp_dma_desc *__ccp_pending_to_active(struct ccp_dma_chan *chan) +{ + struct ccp_dma_desc *desc; + + if (list_empty(&chan->pending)) + return NULL; + + desc = list_empty(&chan->active) + ? list_first_entry(&chan->pending, struct ccp_dma_desc, entry) + : NULL; + + list_splice_tail_init(&chan->pending, &chan->active); + + return desc; +} + +static void ccp_cmd_callback(void *data, int err) +{ + struct ccp_dma_desc *desc = data; + struct ccp_dma_chan *chan; + int ret; + + if (err == -EINPROGRESS) + return; + + chan = container_of(desc->tx_desc.chan, struct ccp_dma_chan, + dma_chan); + + dev_dbg(chan->ccp->dev, "%s - tx %d callback, err=%d\n", + __func__, desc->tx_desc.cookie, err); + + if (err) + desc->status = DMA_ERROR; + + while (true) { + /* Check for DMA descriptor completion */ + desc = ccp_handle_active_desc(chan, desc); + + /* Don't submit cmd if no descriptor or DMA is paused */ + if (!desc || (chan->status == DMA_PAUSED)) + break; + + ret = ccp_issue_next_cmd(desc); + if (!ret) + break; + + desc->status = DMA_ERROR; + } + + tasklet_schedule(&chan->cleanup_tasklet); +} + +static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc) +{ + struct ccp_dma_desc *desc = container_of(tx_desc, struct ccp_dma_desc, + tx_desc); + struct ccp_dma_chan *chan; + dma_cookie_t cookie; + unsigned long flags; + + chan = container_of(tx_desc->chan, struct ccp_dma_chan, dma_chan); + + spin_lock_irqsave(&chan->lock, flags); + + cookie = dma_cookie_assign(tx_desc); + list_add_tail(&desc->entry, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, flags); + + dev_dbg(chan->ccp->dev, "%s - added tx descriptor %d to pending list\n", + __func__, cookie); + + return cookie; +} + +static struct ccp_dma_cmd *ccp_alloc_dma_cmd(struct ccp_dma_chan *chan) +{ + struct ccp_dma_cmd *cmd; + + cmd = kmem_cache_alloc(chan->ccp->dma_cmd_cache, GFP_NOWAIT); + if (cmd) + memset(cmd, 0, sizeof(*cmd)); + + return cmd; +} + +static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan, + unsigned long flags) +{ + struct ccp_dma_desc *desc; + + desc = kmem_cache_alloc(chan->ccp->dma_desc_cache, GFP_NOWAIT); + if (!desc) + return NULL; + + memset(desc, 0, sizeof(*desc)); + + dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan); + desc->tx_desc.flags = flags; + desc->tx_desc.tx_submit = ccp_tx_submit; + desc->ccp = chan->ccp; + INIT_LIST_HEAD(&desc->pending); + INIT_LIST_HEAD(&desc->active); + desc->status = DMA_IN_PROGRESS; + + return desc; +} + +static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan, + struct scatterlist *dst_sg, + unsigned int dst_nents, + struct scatterlist *src_sg, + unsigned int src_nents, + unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_device *ccp = chan->ccp; + struct ccp_dma_desc *desc; + struct ccp_dma_cmd *cmd; + struct ccp_cmd *ccp_cmd; + struct ccp_passthru_nomap_engine *ccp_pt; + unsigned int src_offset, src_len; + unsigned int dst_offset, dst_len; + unsigned int len; + unsigned long sflags; + size_t total_len; + + if (!dst_sg || !src_sg) + return NULL; + + if (!dst_nents || !src_nents) + return NULL; + + desc = ccp_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + total_len = 0; + + src_len = sg_dma_len(src_sg); + src_offset = 0; + + dst_len = sg_dma_len(dst_sg); + dst_offset = 0; + + while (true) { + if (!src_len) { + src_nents--; + if (!src_nents) + break; + + src_sg = sg_next(src_sg); + if (!src_sg) + break; + + src_len = sg_dma_len(src_sg); + src_offset = 0; + continue; + } + + if (!dst_len) { + dst_nents--; + if (!dst_nents) + break; + + dst_sg = sg_next(dst_sg); + if (!dst_sg) + break; + + dst_len = sg_dma_len(dst_sg); + dst_offset = 0; + continue; + } + + len = min(dst_len, src_len); + + cmd = ccp_alloc_dma_cmd(chan); + if (!cmd) + goto err; + + ccp_cmd = &cmd->ccp_cmd; + ccp_pt = &ccp_cmd->u.passthru_nomap; + ccp_cmd->flags = CCP_CMD_MAY_BACKLOG; + ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP; + ccp_cmd->engine = CCP_ENGINE_PASSTHRU; + ccp_pt->bit_mod = CCP_PASSTHRU_BITWISE_NOOP; + ccp_pt->byte_swap = CCP_PASSTHRU_BYTESWAP_NOOP; + ccp_pt->src_dma = sg_dma_address(src_sg) + src_offset; + ccp_pt->dst_dma = sg_dma_address(dst_sg) + dst_offset; + ccp_pt->src_len = len; + ccp_pt->final = 1; + ccp_cmd->callback = ccp_cmd_callback; + ccp_cmd->data = desc; + + list_add_tail(&cmd->entry, &desc->pending); + + dev_dbg(ccp->dev, + "%s - cmd=%p, src=%pad, dst=%pad, len=%llu\n", __func__, + cmd, &ccp_pt->src_dma, + &ccp_pt->dst_dma, ccp_pt->src_len); + + total_len += len; + + src_len -= len; + src_offset += len; + + dst_len -= len; + dst_offset += len; + } + + desc->len = total_len; + + if (list_empty(&desc->pending)) + goto err; + + dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc); + + spin_lock_irqsave(&chan->lock, sflags); + + list_add_tail(&desc->entry, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, sflags); + + return desc; + +err: + ccp_free_cmd_resources(ccp, &desc->pending); + kmem_cache_free(ccp->dma_desc_cache, desc); + + return NULL; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_memcpy( + struct dma_chan *dma_chan, dma_addr_t dst, dma_addr_t src, size_t len, + unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + struct scatterlist dst_sg, src_sg; + + dev_dbg(chan->ccp->dev, + "%s - src=%pad, dst=%pad, len=%zu, flags=%#lx\n", + __func__, &src, &dst, len, flags); + + sg_init_table(&dst_sg, 1); + sg_dma_address(&dst_sg) = dst; + sg_dma_len(&dst_sg) = len; + + sg_init_table(&src_sg, 1); + sg_dma_address(&src_sg) = src; + sg_dma_len(&src_sg) = len; + + desc = ccp_create_desc(dma_chan, &dst_sg, 1, &src_sg, 1, flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_sg( + struct dma_chan *dma_chan, struct scatterlist *dst_sg, + unsigned int dst_nents, struct scatterlist *src_sg, + unsigned int src_nents, unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + + dev_dbg(chan->ccp->dev, + "%s - src=%p, src_nents=%u dst=%p, dst_nents=%u, flags=%#lx\n", + __func__, src_sg, src_nents, dst_sg, dst_nents, flags); + + desc = ccp_create_desc(dma_chan, dst_sg, dst_nents, src_sg, src_nents, + flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_interrupt( + struct dma_chan *dma_chan, unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + + desc = ccp_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static void ccp_issue_pending(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s\n", __func__); + + spin_lock_irqsave(&chan->lock, flags); + + desc = __ccp_pending_to_active(chan); + + spin_unlock_irqrestore(&chan->lock, flags); + + /* If there was nothing active, start processing */ + if (desc) + ccp_cmd_callback(desc, 0); +} + +static enum dma_status ccp_tx_status(struct dma_chan *dma_chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + enum dma_status ret; + unsigned long flags; + + if (chan->status == DMA_PAUSED) { + ret = DMA_PAUSED; + goto out; + } + + ret = dma_cookie_status(dma_chan, cookie, state); + if (ret == DMA_COMPLETE) { + spin_lock_irqsave(&chan->lock, flags); + + /* Get status from complete chain, if still there */ + list_for_each_entry(desc, &chan->complete, entry) { + if (desc->tx_desc.cookie != cookie) + continue; + + ret = desc->status; + break; + } + + spin_unlock_irqrestore(&chan->lock, flags); + } + +out: + dev_dbg(chan->ccp->dev, "%s - %u\n", __func__, ret); + + return ret; +} + +static int ccp_pause(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + + chan->status = DMA_PAUSED; + + /*TODO: Wait for active DMA to complete before returning? */ + + return 0; +} + +static int ccp_resume(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc, + entry); + + spin_unlock_irqrestore(&chan->lock, flags); + + /* Indicate the channel is running again */ + chan->status = DMA_IN_PROGRESS; + + /* If there was something active, re-start */ + if (desc) + ccp_cmd_callback(desc, 0); + + return 0; +} + +static int ccp_terminate_all(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s\n", __func__); + + /*TODO: Wait for active DMA to complete before continuing */ + + spin_lock_irqsave(&chan->lock, flags); + + /*TODO: Purge the complete list? */ + ccp_free_desc_resources(chan->ccp, &chan->active); + ccp_free_desc_resources(chan->ccp, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, flags); + + return 0; +} + +int ccp_dmaengine_register(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_device *dma_dev = &ccp->dma_dev; + struct dma_chan *dma_chan; + char *dma_cmd_cache_name; + char *dma_desc_cache_name; + unsigned int i; + int ret; + + ccp->ccp_dma_chan = devm_kcalloc(ccp->dev, ccp->cmd_q_count, + sizeof(*(ccp->ccp_dma_chan)), + GFP_KERNEL); + if (!ccp->ccp_dma_chan) + return -ENOMEM; + + dma_cmd_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL, + "%s-dmaengine-cmd-cache", + ccp->name); + if (!dma_cmd_cache_name) + return -ENOMEM; + + ccp->dma_cmd_cache = kmem_cache_create(dma_cmd_cache_name, + sizeof(struct ccp_dma_cmd), + sizeof(void *), + SLAB_HWCACHE_ALIGN, NULL); + if (!ccp->dma_cmd_cache) + return -ENOMEM; + + dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL, + "%s-dmaengine-desc-cache", + ccp->name); + if (!dma_cmd_cache_name) + return -ENOMEM; + ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name, + sizeof(struct ccp_dma_desc), + sizeof(void *), + SLAB_HWCACHE_ALIGN, NULL); + if (!ccp->dma_desc_cache) { + ret = -ENOMEM; + goto err_cache; + } + + dma_dev->dev = ccp->dev; + dma_dev->src_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev)); + dma_dev->dst_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev)); + dma_dev->directions = DMA_MEM_TO_MEM; + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SG, dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + + chan->ccp = ccp; + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->pending); + INIT_LIST_HEAD(&chan->active); + INIT_LIST_HEAD(&chan->complete); + + tasklet_init(&chan->cleanup_tasklet, ccp_do_cleanup, + (unsigned long)chan); + + dma_chan->device = dma_dev; + dma_cookie_init(dma_chan); + + list_add_tail(&dma_chan->device_node, &dma_dev->channels); + } + + dma_dev->device_free_chan_resources = ccp_free_chan_resources; + dma_dev->device_prep_dma_memcpy = ccp_prep_dma_memcpy; + dma_dev->device_prep_dma_sg = ccp_prep_dma_sg; + dma_dev->device_prep_dma_interrupt = ccp_prep_dma_interrupt; + dma_dev->device_issue_pending = ccp_issue_pending; + dma_dev->device_tx_status = ccp_tx_status; + dma_dev->device_pause = ccp_pause; + dma_dev->device_resume = ccp_resume; + dma_dev->device_terminate_all = ccp_terminate_all; + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_reg; + + return 0; + +err_reg: + kmem_cache_destroy(ccp->dma_desc_cache); + +err_cache: + kmem_cache_destroy(ccp->dma_cmd_cache); + + return ret; +} + +void ccp_dmaengine_unregister(struct ccp_device *ccp) +{ + struct dma_device *dma_dev = &ccp->dma_dev; + + dma_async_device_unregister(dma_dev); + + kmem_cache_destroy(ccp->dma_desc_cache); + kmem_cache_destroy(ccp->dma_cmd_cache); +} diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index eefdf595f758..ffa2891035ac 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -1427,6 +1427,70 @@ e_mask: return ret; } +static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, + struct ccp_cmd *cmd) +{ + struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap; + struct ccp_dm_workarea mask; + struct ccp_op op; + int ret; + + if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) + return -EINVAL; + + if (!pt->src_dma || !pt->dst_dma) + return -EINVAL; + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) + return -EINVAL; + if (!pt->mask) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + /* Load the mask */ + op.ksb_key = cmd_q->ksb_key; + + mask.length = pt->mask_len; + mask.dma.address = pt->mask; + mask.dma.length = pt->mask_len; + + ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + return ret; + } + } + + /* Send data to the CCP Passthru engine */ + op.eom = 1; + op.soc = 1; + + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = pt->src_dma; + op.src.u.dma.offset = 0; + op.src.u.dma.length = pt->src_len; + + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = pt->dst_dma; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = pt->src_len; + + ret = cmd_q->ccp->vdata->perform->perform_passthru(&op); + if (ret) + cmd->engine_error = cmd_q->cmd_error; + + return ret; +} + static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_ecc_engine *ecc = &cmd->u.ecc; @@ -1762,7 +1826,10 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) ret = ccp_run_rsa_cmd(cmd_q, cmd); break; case CCP_ENGINE_PASSTHRU: - ret = ccp_run_passthru_cmd(cmd_q, cmd); + if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP) + ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd); + else + ret = ccp_run_passthru_cmd(cmd_q, cmd); break; case CCP_ENGINE_ECC: ret = ccp_run_ecc_cmd(cmd_q, cmd); diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 915af3095b39..7c2bb27c067c 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -1,9 +1,10 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky + * Author: Gary R Hook * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -381,6 +382,35 @@ struct ccp_passthru_engine { u32 final; }; +/** + * struct ccp_passthru_nomap_engine - CCP pass-through operation + * without performing DMA mapping + * @bit_mod: bitwise operation to perform + * @byte_swap: byteswap operation to perform + * @mask: mask to be applied to data + * @mask_len: length in bytes of mask + * @src: data to be used for this operation + * @dst: data produced by this operation + * @src_len: length in bytes of data used for this operation + * @final: indicate final pass-through operation + * + * Variables required to be set when calling ccp_enqueue_cmd(): + * - bit_mod, byte_swap, src, dst, src_len + * - mask, mask_len if bit_mod is not CCP_PASSTHRU_BITWISE_NOOP + */ +struct ccp_passthru_nomap_engine { + enum ccp_passthru_bitwise bit_mod; + enum ccp_passthru_byteswap byte_swap; + + dma_addr_t mask; + u32 mask_len; /* In bytes */ + + dma_addr_t src_dma, dst_dma; + u64 src_len; /* In bytes */ + + u32 final; +}; + /***** ECC engine *****/ #define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ #define CCP_ECC_MAX_OPERANDS 6 @@ -522,7 +552,8 @@ enum ccp_engine { }; /* Flag values for flags member of ccp_cmd */ -#define CCP_CMD_MAY_BACKLOG 0x00000001 +#define CCP_CMD_MAY_BACKLOG 0x00000001 +#define CCP_CMD_PASSTHRU_NO_DMA_MAP 0x00000002 /** * struct ccp_cmd - CPP operation request @@ -562,6 +593,7 @@ struct ccp_cmd { struct ccp_sha_engine sha; struct ccp_rsa_engine rsa; struct ccp_passthru_engine passthru; + struct ccp_passthru_nomap_engine passthru_nomap; struct ccp_ecc_engine ecc; } u; -- cgit v1.2.3 From 2e4682ba2ed79d8082b78d292b3b80f54d970b7a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 10 Mar 2016 16:30:22 +0100 Subject: KVM: add missing memory barrier in kvm_{make,check}_request kvm_make_request and kvm_check_request imply a producer-consumer relationship; add implicit memory barriers to them. There was indeed already a place that was adding an explicit smp_mb() to order between kvm_check_request and the processing of the request. That memory barrier can be removed (as an added benefit, kvm_check_request can use smp_mb__after_atomic which is free on x86). Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 3 --- include/linux/kvm_host.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 54ead79e444b..dfb4c6476877 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -382,9 +382,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u32 i, nr_ioapic_pins; int idx; - /* kvm->irq_routing must be read after clearing - * KVM_SCAN_IOAPIC. */ - smp_mb(); idx = srcu_read_lock(&kvm->irq_srcu); table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); nr_ioapic_pins = min_t(u32, table->nr_rt_entries, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5276fe0916fc..ad40d44784c7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1091,6 +1091,11 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { + /* + * Ensure the rest of the request is published to kvm_check_request's + * caller. Paired with the smp_mb__after_atomic in kvm_check_request. + */ + smp_wmb(); set_bit(req, &vcpu->requests); } @@ -1098,6 +1103,12 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) { if (test_bit(req, &vcpu->requests)) { clear_bit(req, &vcpu->requests); + + /* + * Ensure the rest of the request is visible to kvm_check_request's + * caller. Paired with the smp_wmb in kvm_make_request. + */ + smp_mb__after_atomic(); return true; } else { return false; -- cgit v1.2.3 From de4a54c4dfaed0604565c1b27488dce56997acc0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Jan 2016 20:19:41 +0000 Subject: regulator: core: Use a bitfield for continuous_voltage_range Using a bitfield enables the compiler to lay out the structure more efficiently when we have other boolean flags since multiple values can be included in a single byte. Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index cd271e89a7e6..9ac3f9879576 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -292,7 +292,7 @@ struct regulator_desc { const struct regulator_desc *, struct regulator_config *); int id; - bool continuous_voltage_range; + unsigned int continuous_voltage_range:1; unsigned n_voltages; const struct regulator_ops *ops; int irq; -- cgit v1.2.3 From 9257b4a206fc0229dd5f84b78e4d1ebf3f91d270 Mon Sep 17 00:00:00 2001 From: Omer Peleg Date: Wed, 20 Apr 2016 11:34:11 +0300 Subject: iommu/iova: introduce per-cpu caching to iova allocation IOVA allocation has two problems that impede high-throughput I/O. First, it can do a linear search over the allocated IOVA ranges. Second, the rbtree spinlock that serializes IOVA allocations becomes contended. Address these problems by creating an API for caching allocated IOVA ranges, so that the IOVA allocator isn't accessed frequently. This patch adds a per-CPU cache, from which CPUs can alloc/free IOVAs without taking the rbtree spinlock. The per-CPU caches are backed by a global cache, to avoid invoking the (linear-time) IOVA allocator without needing to make the per-CPU cache size excessive. This design is based on magazines, as described in "Magazines and Vmem: Extending the Slab Allocator to Many CPUs and Arbitrary Resources" (currently available at https://www.usenix.org/legacy/event/usenix01/bonwick.html) Adding caching on top of the existing rbtree allocator maintains the property that IOVAs are densely packed in the IO virtual address space, which is important for keeping IOMMU page table usage low. To keep the cache size reasonable, we bound the IOVA space a CPU can cache by 32 MiB (we cache a bounded number of IOVA ranges, and only ranges of size <= 128 KiB). The shared global cache is bounded at 4 MiB of IOVA space. Signed-off-by: Omer Peleg [mad@cs.technion.ac.il: rebased, cleaned up and reworded the commit message] Signed-off-by: Adam Morrison Reviewed-by: Shaohua Li Reviewed-by: Ben Serebrin [dwmw2: split out VT-d part into a separate patch] Signed-off-by: David Woodhouse --- drivers/iommu/iova.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/iova.h | 23 ++- 2 files changed, 414 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index fa0adef32bd6..ba764a0835d3 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -20,6 +20,17 @@ #include #include #include +#include +#include + +static bool iova_rcache_insert(struct iova_domain *iovad, + unsigned long pfn, + unsigned long size); +static unsigned long iova_rcache_get(struct iova_domain *iovad, + unsigned long size, + unsigned long limit_pfn); +static void init_iova_rcaches(struct iova_domain *iovad); +static void free_iova_rcaches(struct iova_domain *iovad); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, @@ -38,6 +49,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; + init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -291,33 +303,18 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, } EXPORT_SYMBOL_GPL(alloc_iova); -/** - * find_iova - find's an iova for a given pfn - * @iovad: - iova domain in question. - * @pfn: - page frame number - * This function finds and returns an iova belonging to the - * given doamin which matches the given pfn. - */ -struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) +static struct iova * +private_find_iova(struct iova_domain *iovad, unsigned long pfn) { - unsigned long flags; - struct rb_node *node; + struct rb_node *node = iovad->rbroot.rb_node; + + assert_spin_locked(&iovad->iova_rbtree_lock); - /* Take the lock so that no other thread is manipulating the rbtree */ - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = iovad->rbroot.rb_node; while (node) { struct iova *iova = container_of(node, struct iova, node); /* If pfn falls within iova's range, return iova */ if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - /* We are not holding the lock while this iova - * is referenced by the caller as the same thread - * which called this function also calls __free_iova() - * and it is by design that only one thread can possibly - * reference a particular iova and hence no conflict. - */ return iova; } @@ -327,9 +324,35 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) node = node->rb_right; } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return NULL; } + +static void private_free_iova(struct iova_domain *iovad, struct iova *iova) +{ + assert_spin_locked(&iovad->iova_rbtree_lock); + __cached_rbnode_delete_update(iovad, iova); + rb_erase(&iova->node, &iovad->rbroot); + free_iova_mem(iova); +} + +/** + * find_iova - finds an iova for a given pfn + * @iovad: - iova domain in question. + * @pfn: - page frame number + * This function finds and returns an iova belonging to the + * given doamin which matches the given pfn. + */ +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) +{ + unsigned long flags; + struct iova *iova; + + /* Take the lock so that no other thread is manipulating the rbtree */ + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + iova = private_find_iova(iovad, pfn); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return iova; +} EXPORT_SYMBOL_GPL(find_iova); /** @@ -344,10 +367,8 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) unsigned long flags; spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - __cached_rbnode_delete_update(iovad, iova); - rb_erase(&iova->node, &iovad->rbroot); + private_free_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - free_iova_mem(iova); } EXPORT_SYMBOL_GPL(__free_iova); @@ -369,6 +390,63 @@ free_iova(struct iova_domain *iovad, unsigned long pfn) } EXPORT_SYMBOL_GPL(free_iova); +/** + * alloc_iova_fast - allocates an iova from rcache + * @iovad: - iova domain in question + * @size: - size of page frames to allocate + * @limit_pfn: - max limit address + * This function tries to satisfy an iova allocation from the rcache, + * and falls back to regular allocation on failure. +*/ +unsigned long +alloc_iova_fast(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn) +{ + bool flushed_rcache = false; + unsigned long iova_pfn; + struct iova *new_iova; + + iova_pfn = iova_rcache_get(iovad, size, limit_pfn); + if (iova_pfn) + return iova_pfn; + +retry: + new_iova = alloc_iova(iovad, size, limit_pfn, true); + if (!new_iova) { + unsigned int cpu; + + if (flushed_rcache) + return 0; + + /* Try replenishing IOVAs by flushing rcache. */ + flushed_rcache = true; + for_each_online_cpu(cpu) + free_cpu_cached_iovas(cpu, iovad); + goto retry; + } + + return new_iova->pfn_lo; +} +EXPORT_SYMBOL_GPL(alloc_iova_fast); + +/** + * free_iova_fast - free iova pfn range into rcache + * @iovad: - iova domain in question. + * @pfn: - pfn that is allocated previously + * @size: - # of pages in range + * This functions frees an iova range by trying to put it into the rcache, + * falling back to regular iova deallocation via free_iova() if this fails. + */ +void +free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) +{ + if (iova_rcache_insert(iovad, pfn, size)) + return; + + free_iova(iovad, pfn); +} +EXPORT_SYMBOL_GPL(free_iova_fast); + /** * put_iova_domain - destroys the iova doamin * @iovad: - iova domain in question. @@ -379,6 +457,7 @@ void put_iova_domain(struct iova_domain *iovad) struct rb_node *node; unsigned long flags; + free_iova_rcaches(iovad); spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); node = rb_first(&iovad->rbroot); while (node) { @@ -550,5 +629,295 @@ error: return NULL; } +/* + * Magazine caches for IOVA ranges. For an introduction to magazines, + * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab + * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. + * For simplicity, we use a static magazine size and don't implement the + * dynamic size tuning described in the paper. + */ + +#define IOVA_MAG_SIZE 128 + +struct iova_magazine { + unsigned long size; + unsigned long pfns[IOVA_MAG_SIZE]; +}; + +struct iova_cpu_rcache { + spinlock_t lock; + struct iova_magazine *loaded; + struct iova_magazine *prev; +}; + +static struct iova_magazine *iova_magazine_alloc(gfp_t flags) +{ + return kzalloc(sizeof(struct iova_magazine), flags); +} + +static void iova_magazine_free(struct iova_magazine *mag) +{ + kfree(mag); +} + +static void +iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) +{ + unsigned long flags; + int i; + + if (!mag) + return; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + + for (i = 0 ; i < mag->size; ++i) { + struct iova *iova = private_find_iova(iovad, mag->pfns[i]); + + BUG_ON(!iova); + private_free_iova(iovad, iova); + } + + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + + mag->size = 0; +} + +static bool iova_magazine_full(struct iova_magazine *mag) +{ + return (mag && mag->size == IOVA_MAG_SIZE); +} + +static bool iova_magazine_empty(struct iova_magazine *mag) +{ + return (!mag || mag->size == 0); +} + +static unsigned long iova_magazine_pop(struct iova_magazine *mag, + unsigned long limit_pfn) +{ + BUG_ON(iova_magazine_empty(mag)); + + if (mag->pfns[mag->size - 1] >= limit_pfn) + return 0; + + return mag->pfns[--mag->size]; +} + +static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) +{ + BUG_ON(iova_magazine_full(mag)); + + mag->pfns[mag->size++] = pfn; +} + +static void init_iova_rcaches(struct iova_domain *iovad) +{ + struct iova_cpu_rcache *cpu_rcache; + struct iova_rcache *rcache; + unsigned int cpu; + int i; + + for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + rcache = &iovad->rcaches[i]; + spin_lock_init(&rcache->lock); + rcache->depot_size = 0; + rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); + if (WARN_ON(!rcache->cpu_rcaches)) + continue; + for_each_possible_cpu(cpu) { + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + spin_lock_init(&cpu_rcache->lock); + cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); + cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); + } + } +} + +/* + * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and + * return true on success. Can fail if rcache is full and we can't free + * space, and free_iova() (our only caller) will then return the IOVA + * range to the rbtree instead. + */ +static bool __iova_rcache_insert(struct iova_domain *iovad, + struct iova_rcache *rcache, + unsigned long iova_pfn) +{ + struct iova_magazine *mag_to_free = NULL; + struct iova_cpu_rcache *cpu_rcache; + bool can_insert = false; + unsigned long flags; + + cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches); + spin_lock_irqsave(&cpu_rcache->lock, flags); + + if (!iova_magazine_full(cpu_rcache->loaded)) { + can_insert = true; + } else if (!iova_magazine_full(cpu_rcache->prev)) { + swap(cpu_rcache->prev, cpu_rcache->loaded); + can_insert = true; + } else { + struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); + + if (new_mag) { + spin_lock(&rcache->lock); + if (rcache->depot_size < MAX_GLOBAL_MAGS) { + rcache->depot[rcache->depot_size++] = + cpu_rcache->loaded; + } else { + mag_to_free = cpu_rcache->loaded; + } + spin_unlock(&rcache->lock); + + cpu_rcache->loaded = new_mag; + can_insert = true; + } + } + + if (can_insert) + iova_magazine_push(cpu_rcache->loaded, iova_pfn); + + spin_unlock_irqrestore(&cpu_rcache->lock, flags); + + if (mag_to_free) { + iova_magazine_free_pfns(mag_to_free, iovad); + iova_magazine_free(mag_to_free); + } + + return can_insert; +} + +static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, + unsigned long size) +{ + unsigned int log_size = order_base_2(size); + + if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) + return false; + + return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn); +} + +/* + * Caller wants to allocate a new IOVA range from 'rcache'. If we can + * satisfy the request, return a matching non-NULL range and remove + * it from the 'rcache'. + */ +static unsigned long __iova_rcache_get(struct iova_rcache *rcache, + unsigned long limit_pfn) +{ + struct iova_cpu_rcache *cpu_rcache; + unsigned long iova_pfn = 0; + bool has_pfn = false; + unsigned long flags; + + cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches); + spin_lock_irqsave(&cpu_rcache->lock, flags); + + if (!iova_magazine_empty(cpu_rcache->loaded)) { + has_pfn = true; + } else if (!iova_magazine_empty(cpu_rcache->prev)) { + swap(cpu_rcache->prev, cpu_rcache->loaded); + has_pfn = true; + } else { + spin_lock(&rcache->lock); + if (rcache->depot_size > 0) { + iova_magazine_free(cpu_rcache->loaded); + cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; + has_pfn = true; + } + spin_unlock(&rcache->lock); + } + + if (has_pfn) + iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); + + spin_unlock_irqrestore(&cpu_rcache->lock, flags); + + return iova_pfn; +} + +/* + * Try to satisfy IOVA allocation range from rcache. Fail if requested + * size is too big or the DMA limit we are given isn't satisfied by the + * top element in the magazine. + */ +static unsigned long iova_rcache_get(struct iova_domain *iovad, + unsigned long size, + unsigned long limit_pfn) +{ + unsigned int log_size = order_base_2(size); + + if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) + return 0; + + return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); +} + +/* + * Free a cpu's rcache. + */ +static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, + struct iova_rcache *rcache) +{ + struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + unsigned long flags; + + spin_lock_irqsave(&cpu_rcache->lock, flags); + + iova_magazine_free_pfns(cpu_rcache->loaded, iovad); + iova_magazine_free(cpu_rcache->loaded); + + iova_magazine_free_pfns(cpu_rcache->prev, iovad); + iova_magazine_free(cpu_rcache->prev); + + spin_unlock_irqrestore(&cpu_rcache->lock, flags); +} + +/* + * free rcache data structures. + */ +static void free_iova_rcaches(struct iova_domain *iovad) +{ + struct iova_rcache *rcache; + unsigned long flags; + unsigned int cpu; + int i, j; + + for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + rcache = &iovad->rcaches[i]; + for_each_possible_cpu(cpu) + free_cpu_iova_rcache(cpu, iovad, rcache); + spin_lock_irqsave(&rcache->lock, flags); + free_percpu(rcache->cpu_rcaches); + for (j = 0; j < rcache->depot_size; ++j) { + iova_magazine_free_pfns(rcache->depot[j], iovad); + iova_magazine_free(rcache->depot[j]); + } + spin_unlock_irqrestore(&rcache->lock, flags); + } +} + +/* + * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) + */ +void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) +{ + struct iova_cpu_rcache *cpu_rcache; + struct iova_rcache *rcache; + unsigned long flags; + int i; + + for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + rcache = &iovad->rcaches[i]; + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + spin_lock_irqsave(&cpu_rcache->lock, flags); + iova_magazine_free_pfns(cpu_rcache->loaded, iovad); + iova_magazine_free_pfns(cpu_rcache->prev, iovad); + spin_unlock_irqrestore(&cpu_rcache->lock, flags); + } +} + MODULE_AUTHOR("Anil S Keshavamurthy "); MODULE_LICENSE("GPL"); diff --git a/include/linux/iova.h b/include/linux/iova.h index 92f7177db2ce..f27bb2c62fca 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -19,8 +19,21 @@ /* iova structure */ struct iova { struct rb_node node; - unsigned long pfn_hi; /* IOMMU dish out addr hi */ - unsigned long pfn_lo; /* IOMMU dish out addr lo */ + unsigned long pfn_hi; /* Highest allocated pfn */ + unsigned long pfn_lo; /* Lowest allocated pfn */ +}; + +struct iova_magazine; +struct iova_cpu_rcache; + +#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ +#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ + +struct iova_rcache { + spinlock_t lock; + unsigned long depot_size; + struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + struct iova_cpu_rcache __percpu *cpu_rcaches; }; /* holds all the iova translations for a domain */ @@ -31,6 +44,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ }; static inline unsigned long iova_size(struct iova *iova) @@ -78,6 +92,10 @@ void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned); +void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, + unsigned long size); +unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); @@ -87,5 +105,6 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); +void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #endif -- cgit v1.2.3 From 80e0f8d94d3090f0f7bf3faf3e6180e920ee0d22 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 24 Feb 2016 14:12:59 +0530 Subject: pinctrl: Add devm_ apis for pinctrl_{register, unregister} Add device managed APIs devm_pinctrl_register() and devm_pinctrl_unregister() for the APIs pinctrl_register() and pinctrl_unregister(). This helps in reducing code in error path and sometimes removal of .remove callback for driver unbind. Signed-off-by: Laxman Dewangan Reviewed-by: Philipp Zabel Acked-by: Bjorn Andersson Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 63 +++++++++++++++++++++++++++++++++++++++++ include/linux/pinctrl/pinctrl.h | 6 ++++ 2 files changed, 69 insertions(+) (limited to 'include/linux') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index f67a8b7a4e18..21df52e8192a 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1872,6 +1872,69 @@ void pinctrl_unregister(struct pinctrl_dev *pctldev) } EXPORT_SYMBOL_GPL(pinctrl_unregister); +static void devm_pinctrl_dev_release(struct device *dev, void *res) +{ + struct pinctrl_dev *pctldev = *(struct pinctrl_dev **)res; + + pinctrl_unregister(pctldev); +} + +static int devm_pinctrl_dev_match(struct device *dev, void *res, void *data) +{ + struct pctldev **r = res; + + if (WARN_ON(!r || !*r) + return 0; + + return *r == data; +} + +/** + * devm_pinctrl_register() - Resource managed version of pinctrl_register(). + * @dev: parent device for this pin controller + * @pctldesc: descriptor for this pin controller + * @driver_data: private pin controller data for this pin controller + * + * Returns an error pointer if pincontrol register failed. Otherwise + * it returns valid pinctrl handle. + * + * The pinctrl device will be automatically released when the device is unbound. + */ +struct pinctrl_dev *devm_pinctrl_register(struct device *dev, + struct pinctrl_desc *pctldesc, + void *driver_data) +{ + struct pinctrl_dev **ptr, *pctldev; + + ptr = devres_alloc(devm_pinctrl_dev_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + pctldev = pinctrl_register(pctldesc, dev, driver_data); + if (IS_ERR(pctldev)) { + devres_free(ptr); + return pctldev; + } + + *ptr = pctldev; + devres_add(dev, ptr); + + return pctldev; +} +EXPORT_SYMBOL_GPL(devm_pinctrl_register); + +/** + * devm_pinctrl_unregister() - Resource managed version of pinctrl_unregister(). + * @dev: device for which which resource was allocated + * @pctldev: the pinctrl device to unregister. + */ +void devm_pinctrl_unregister(struct device *dev, struct pinctrl_dev *pctldev) +{ + WARN_ON(devres_release(dev, devm_pinctrl_dev_release, + devm_pinctrl_dev_match, pctldev)); +} +EXPORT_SYMBOL_GPL(devm_pinctrl_unregister); + static int __init pinctrl_init(void) { pr_info("initialized pinctrl subsystem\n"); diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 9ba59fcba549..a42e57da270d 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -144,6 +144,12 @@ struct pinctrl_desc { extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); extern void pinctrl_unregister(struct pinctrl_dev *pctldev); +extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev, + struct pinctrl_desc *pctldesc, + void *driver_data); +extern void devm_pinctrl_unregister(struct device *dev, + struct pinctrl_dev *pctldev); + extern bool pin_is_valid(struct pinctrl_dev *pctldev, int pin); extern void pinctrl_add_gpio_range(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range); -- cgit v1.2.3 From b53f27e4fa0d0e72d897830cc4f3f83d2a25d952 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:23 -0700 Subject: string_helpers: add kstrdup_quotable Handle allocating and escaping a string safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/string_helpers.h | 2 ++ lib/string_helpers.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index dabe643eb5fa..9de228af00c1 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -68,4 +68,6 @@ static inline int string_escape_str_any_np(const char *src, char *dst, return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only); } +char *kstrdup_quotable(const char *src, gfp_t gfp); + #endif diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 5c88204b6f1f..aa00c9f989ee 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -534,3 +535,30 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, return p - dst; } EXPORT_SYMBOL(string_escape_mem); + +/* + * Return an allocated string that has been escaped of special characters + * and double quotes, making it safe to log in quotes. + */ +char *kstrdup_quotable(const char *src, gfp_t gfp) +{ + size_t slen, dlen; + char *dst; + const int flags = ESCAPE_HEX; + const char esc[] = "\f\n\r\t\v\a\e\\\""; + + if (!src) + return NULL; + slen = strlen(src); + + dlen = string_escape_mem(src, slen, NULL, 0, flags, esc); + dst = kmalloc(dlen + 1, gfp); + if (!dst) + return NULL; + + WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen); + dst[dlen] = '\0'; + + return dst; +} +EXPORT_SYMBOL_GPL(kstrdup_quotable); -- cgit v1.2.3 From 0d0443288f2244d7054796086e481ddef6abdbba Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:24 -0700 Subject: string_helpers: add kstrdup_quotable_cmdline Provide an escaped (but readable: no inter-argument NULLs) commandline safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/string_helpers.h | 1 + lib/string_helpers.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 9de228af00c1..684d2695fc36 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -69,5 +69,6 @@ static inline int string_escape_str_any_np(const char *src, char *dst, } char *kstrdup_quotable(const char *src, gfp_t gfp); +char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); #endif diff --git a/lib/string_helpers.c b/lib/string_helpers.c index aa00c9f989ee..b16ee85aaf87 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -562,3 +563,36 @@ char *kstrdup_quotable(const char *src, gfp_t gfp) return dst; } EXPORT_SYMBOL_GPL(kstrdup_quotable); + +/* + * Returns allocated NULL-terminated string containing process + * command line, with inter-argument NULLs replaced with spaces, + * and other special characters escaped. + */ +char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp) +{ + char *buffer, *quoted; + int i, res; + + buffer = kmalloc(PAGE_SIZE, GFP_TEMPORARY); + if (!buffer) + return NULL; + + res = get_cmdline(task, buffer, PAGE_SIZE - 1); + buffer[res] = '\0'; + + /* Collapse trailing NULLs, leave res pointing to last non-NULL. */ + while (--res >= 0 && buffer[res] == '\0') + ; + + /* Replace inter-argument NULLs. */ + for (i = 0; i <= res; i++) + if (buffer[i] == '\0') + buffer[i] = ' '; + + /* Make sure result is printable. */ + quoted = kstrdup_quotable(buffer, gfp); + kfree(buffer); + return quoted; +} +EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline); -- cgit v1.2.3 From 21985319add60b55fc27230d9421a3e5af7e998a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:25 -0700 Subject: string_helpers: add kstrdup_quotable_file Allocate a NULL-terminated file path with special characters escaped, safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/string_helpers.h | 3 +++ lib/string_helpers.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 684d2695fc36..5ce9538f290e 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -3,6 +3,8 @@ #include +struct file; + /* Descriptions of the types of units to * print in */ enum string_size_units { @@ -70,5 +72,6 @@ static inline int string_escape_str_any_np(const char *src, char *dst, char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); +char *kstrdup_quotable_file(struct file *file, gfp_t gfp); #endif diff --git a/lib/string_helpers.c b/lib/string_helpers.c index b16ee85aaf87..ecaac2c0526f 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -596,3 +598,31 @@ char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp) return quoted; } EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline); + +/* + * Returns allocated NULL-terminated string containing pathname, + * with special characters escaped, able to be safely logged. If + * there is an error, the leading character will be "<". + */ +char *kstrdup_quotable_file(struct file *file, gfp_t gfp) +{ + char *temp, *pathname; + + if (!file) + return kstrdup("", gfp); + + /* We add 11 spaces for ' (deleted)' to be appended */ + temp = kmalloc(PATH_MAX + 11, GFP_TEMPORARY); + if (!temp) + return kstrdup("", gfp); + + pathname = file_path(file, temp, PATH_MAX + 11); + if (IS_ERR(pathname)) + pathname = kstrdup("", gfp); + else + pathname = kstrdup_quotable(pathname, gfp); + + kfree(temp); + return pathname; +} +EXPORT_SYMBOL_GPL(kstrdup_quotable_file); -- cgit v1.2.3 From 1284ab5b2dcb927d38e4f3fbc2e307f3d1af9262 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 20 Apr 2016 15:46:27 -0700 Subject: fs: define a string representation of the kernel_read_file_id enumeration A string representation of the kernel_read_file_id enumeration is needed for displaying messages (eg. pr_info, auditing) that can be used by multiple LSMs and the integrity subsystem. To simplify keeping the list of strings up to date with the enumeration, this patch defines two new preprocessing macros named __fid_enumify and __fid_stringify to create the enumeration and an array of strings. kernel_read_file_id_str() returns a string based on the enumeration. Signed-off-by: Mimi Zohar [kees: removed removal of my old version, constified pointer values] Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/fs.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..90477550b935 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2580,15 +2580,34 @@ static inline void i_readcount_inc(struct inode *inode) #endif extern int do_pipe_flags(int *, int); +#define __kernel_read_file_id(id) \ + id(UNKNOWN, unknown) \ + id(FIRMWARE, firmware) \ + id(MODULE, kernel-module) \ + id(KEXEC_IMAGE, kexec-image) \ + id(KEXEC_INITRAMFS, kexec-initramfs) \ + id(POLICY, security-policy) \ + id(MAX_ID, ) + +#define __fid_enumify(ENUM, dummy) READING_ ## ENUM, +#define __fid_stringify(dummy, str) #str, + enum kernel_read_file_id { - READING_FIRMWARE = 1, - READING_MODULE, - READING_KEXEC_IMAGE, - READING_KEXEC_INITRAMFS, - READING_POLICY, - READING_MAX_ID + __kernel_read_file_id(__fid_enumify) +}; + +static const char * const kernel_read_file_str[] = { + __kernel_read_file_id(__fid_stringify) }; +static inline const char * const kernel_read_file_id_str(enum kernel_read_file_id id) +{ + if (id < 0 || id >= READING_MAX_ID) + return kernel_read_file_str[READING_UNKNOWN]; + + return kernel_read_file_str[id]; +} + extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); -- cgit v1.2.3 From 9b091556a073a9f5f93e2ad23d118f45c4796a84 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:28 -0700 Subject: LSM: LoadPin for kernel file loading restrictions This LSM enforces that kernel-loaded files (modules, firmware, etc) must all come from the same filesystem, with the expectation that such a filesystem is backed by a read-only device such as dm-verity or CDROM. This allows systems that have a verified and/or unchangeable filesystem to enforce module and firmware loading restrictions without needing to sign the files individually. Signed-off-by: Kees Cook Acked-by: Serge Hallyn Signed-off-by: James Morris --- Documentation/security/LoadPin.txt | 17 ++++ MAINTAINERS | 6 ++ include/linux/lsm_hooks.h | 5 + security/Kconfig | 1 + security/Makefile | 2 + security/loadpin/Kconfig | 10 ++ security/loadpin/Makefile | 1 + security/loadpin/loadpin.c | 190 +++++++++++++++++++++++++++++++++++++ security/security.c | 1 + 9 files changed, 233 insertions(+) create mode 100644 Documentation/security/LoadPin.txt create mode 100644 security/loadpin/Kconfig create mode 100644 security/loadpin/Makefile create mode 100644 security/loadpin/loadpin.c (limited to 'include/linux') diff --git a/Documentation/security/LoadPin.txt b/Documentation/security/LoadPin.txt new file mode 100644 index 000000000000..e11877f5d3d4 --- /dev/null +++ b/Documentation/security/LoadPin.txt @@ -0,0 +1,17 @@ +LoadPin is a Linux Security Module that ensures all kernel-loaded files +(modules, firmware, etc) all originate from the same filesystem, with +the expectation that such a filesystem is backed by a read-only device +such as dm-verity or CDROM. This allows systems that have a verified +and/or unchangeable filesystem to enforce module and firmware loading +restrictions without needing to sign the files individually. + +The LSM is selectable at build-time with CONFIG_SECURITY_LOADPIN, and +can be controlled at boot-time with the kernel command line option +"loadpin.enabled". By default, it is enabled, but can be disabled at +boot ("loadpin.enabled=0"). + +LoadPin starts pinning when it sees the first file loaded. If the +block device backing the filesystem is not read-only, a sysctl is +created to toggle pinning: /proc/sys/kernel/loadpin/enabled. (Having +a mutable filesystem means pinning is mutable too, but having the +sysctl allows for easy testing on systems with a mutable filesystem.) diff --git a/MAINTAINERS b/MAINTAINERS index 1c32f8a3d6c4..b4b1e8179018 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9962,6 +9962,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git S: Supported F: security/apparmor/ +LOADPIN SECURITY MODULE +M: Kees Cook +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git lsm/loadpin +S: Supported +F: security/loadpin/ + YAMA SECURITY MODULE M: Kees Cook T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ae2537886177..6e466fc0666c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1892,5 +1892,10 @@ extern void __init yama_add_hooks(void); #else static inline void __init yama_add_hooks(void) { } #endif +#ifdef CONFIG_SECURITY_LOADPIN +void __init loadpin_add_hooks(void); +#else +static inline void loadpin_add_hooks(void) { }; +#endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/Kconfig b/security/Kconfig index e45237897b43..176758cdfa57 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -122,6 +122,7 @@ source security/selinux/Kconfig source security/smack/Kconfig source security/tomoyo/Kconfig source security/apparmor/Kconfig +source security/loadpin/Kconfig source security/yama/Kconfig source security/integrity/Kconfig diff --git a/security/Makefile b/security/Makefile index c9bfbc84ff50..f2d71cdb8e19 100644 --- a/security/Makefile +++ b/security/Makefile @@ -8,6 +8,7 @@ subdir-$(CONFIG_SECURITY_SMACK) += smack subdir-$(CONFIG_SECURITY_TOMOYO) += tomoyo subdir-$(CONFIG_SECURITY_APPARMOR) += apparmor subdir-$(CONFIG_SECURITY_YAMA) += yama +subdir-$(CONFIG_SECURITY_LOADPIN) += loadpin # always enable default capabilities obj-y += commoncap.o @@ -22,6 +23,7 @@ obj-$(CONFIG_AUDIT) += lsm_audit.o obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor/ obj-$(CONFIG_SECURITY_YAMA) += yama/ +obj-$(CONFIG_SECURITY_LOADPIN) += loadpin/ obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o # Object integrity file lists diff --git a/security/loadpin/Kconfig b/security/loadpin/Kconfig new file mode 100644 index 000000000000..c668ac4eda65 --- /dev/null +++ b/security/loadpin/Kconfig @@ -0,0 +1,10 @@ +config SECURITY_LOADPIN + bool "Pin load of kernel files (modules, fw, etc) to one filesystem" + depends on SECURITY && BLOCK + help + Any files read through the kernel file reading interface + (kernel modules, firmware, kexec images, security policy) will + be pinned to the first filesystem used for loading. Any files + that come from other filesystems will be rejected. This is best + used on systems without an initrd that have a root filesystem + backed by a read-only device such as dm-verity or a CDROM. diff --git a/security/loadpin/Makefile b/security/loadpin/Makefile new file mode 100644 index 000000000000..c2d77f83037b --- /dev/null +++ b/security/loadpin/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SECURITY_LOADPIN) += loadpin.o diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c new file mode 100644 index 000000000000..e4debae3c4d6 --- /dev/null +++ b/security/loadpin/loadpin.c @@ -0,0 +1,190 @@ +/* + * Module and Firmware Pinning Security Module + * + * Copyright 2011-2016 Google Inc. + * + * Author: Kees Cook + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "LoadPin: " fmt + +#include +#include +#include +#include +#include +#include +#include /* current */ +#include + +static void report_load(const char *origin, struct file *file, char *operation) +{ + char *cmdline, *pathname; + + pathname = kstrdup_quotable_file(file, GFP_KERNEL); + cmdline = kstrdup_quotable_cmdline(current, GFP_KERNEL); + + pr_notice("%s %s obj=%s%s%s pid=%d cmdline=%s%s%s\n", + origin, operation, + (pathname && pathname[0] != '<') ? "\"" : "", + pathname, + (pathname && pathname[0] != '<') ? "\"" : "", + task_pid_nr(current), + cmdline ? "\"" : "", cmdline, cmdline ? "\"" : ""); + + kfree(cmdline); + kfree(pathname); +} + +static int enabled = 1; +static struct super_block *pinned_root; +static DEFINE_SPINLOCK(pinned_root_spinlock); + +#ifdef CONFIG_SYSCTL +static int zero; +static int one = 1; + +static struct ctl_path loadpin_sysctl_path[] = { + { .procname = "kernel", }, + { .procname = "loadpin", }, + { } +}; + +static struct ctl_table loadpin_sysctl_table[] = { + { + .procname = "enabled", + .data = &enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { } +}; + +/* + * This must be called after early kernel init, since then the rootdev + * is available. + */ +static void check_pinning_enforcement(struct super_block *mnt_sb) +{ + bool ro = false; + + /* + * If load pinning is not enforced via a read-only block + * device, allow sysctl to change modes for testing. + */ + if (mnt_sb->s_bdev) { + ro = bdev_read_only(mnt_sb->s_bdev); + pr_info("dev(%u,%u): %s\n", + MAJOR(mnt_sb->s_bdev->bd_dev), + MINOR(mnt_sb->s_bdev->bd_dev), + ro ? "read-only" : "writable"); + } else + pr_info("mnt_sb lacks block device, treating as: writable\n"); + + if (!ro) { + if (!register_sysctl_paths(loadpin_sysctl_path, + loadpin_sysctl_table)) + pr_notice("sysctl registration failed!\n"); + else + pr_info("load pinning can be disabled.\n"); + } else + pr_info("load pinning engaged.\n"); +} +#else +static void check_pinning_enforcement(struct super_block *mnt_sb) +{ + pr_info("load pinning engaged.\n"); +} +#endif + +static void loadpin_sb_free_security(struct super_block *mnt_sb) +{ + /* + * When unmounting the filesystem we were using for load + * pinning, we acknowledge the superblock release, but make sure + * no other modules or firmware can be loaded. + */ + if (!IS_ERR_OR_NULL(pinned_root) && mnt_sb == pinned_root) { + pinned_root = ERR_PTR(-EIO); + pr_info("umount pinned fs: refusing further loads\n"); + } +} + +static int loadpin_read_file(struct file *file, enum kernel_read_file_id id) +{ + struct super_block *load_root; + const char *origin = kernel_read_file_id_str(id); + + /* This handles the older init_module API that has a NULL file. */ + if (!file) { + if (!enabled) { + report_load(origin, NULL, "old-api-pinning-ignored"); + return 0; + } + + report_load(origin, NULL, "old-api-denied"); + return -EPERM; + } + + load_root = file->f_path.mnt->mnt_sb; + + /* First loaded module/firmware defines the root for all others. */ + spin_lock(&pinned_root_spinlock); + /* + * pinned_root is only NULL at startup. Otherwise, it is either + * a valid reference, or an ERR_PTR. + */ + if (!pinned_root) { + pinned_root = load_root; + /* + * Unlock now since it's only pinned_root we care about. + * In the worst case, we will (correctly) report pinning + * failures before we have announced that pinning is + * enabled. This would be purely cosmetic. + */ + spin_unlock(&pinned_root_spinlock); + check_pinning_enforcement(pinned_root); + report_load(origin, file, "pinned"); + } else { + spin_unlock(&pinned_root_spinlock); + } + + if (IS_ERR_OR_NULL(pinned_root) || load_root != pinned_root) { + if (unlikely(!enabled)) { + report_load(origin, file, "pinning-ignored"); + return 0; + } + + report_load(origin, file, "denied"); + return -EPERM; + } + + return 0; +} + +static struct security_hook_list loadpin_hooks[] = { + LSM_HOOK_INIT(sb_free_security, loadpin_sb_free_security), + LSM_HOOK_INIT(kernel_read_file, loadpin_read_file), +}; + +void __init loadpin_add_hooks(void) +{ + pr_info("ready to pin (currently %sabled)", enabled ? "en" : "dis"); + security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks)); +} + +/* Should not be mutable after boot, so not listed in sysfs (perm == 0). */ +module_param(enabled, int, 0); +MODULE_PARM_DESC(enabled, "Pin module/firmware loading (default: true)"); diff --git a/security/security.c b/security/security.c index 554c3fb7d4a5..e42860899f23 100644 --- a/security/security.c +++ b/security/security.c @@ -60,6 +60,7 @@ int __init security_init(void) */ capability_add_hooks(); yama_add_hooks(); + loadpin_add_hooks(); /* * Load all the remaining security modules. -- cgit v1.2.3 From 85b67bcb7e4a23ced05e7020bf5843b9857f6881 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 18 Apr 2016 20:11:50 -0700 Subject: perf, bpf: minimize the size of perf_trace_() tracepoint handler move trace_call_bpf() into helper function to minimize the size of perf_trace_*() tracepoint handlers. text data bss dec hex filename 10541679 5526646 2945024 19013349 1221ee5 vmlinux_before 10509422 5526646 2945024 18981092 121a0e4 vmlinux_after It may seem that perf_fetch_caller_regs() can also be moved, but that is incorrect, since ip/sp will be wrong. bpf+tracepoint performance is not affected, since perf_swevent_put_recursion_context() is now inlined. export_symbol_gpl can also be dropped. No measurable change in normal perf tracepoints. Suggested-by: Steven Rostedt Signed-off-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Signed-off-by: David S. Miller --- include/linux/trace_events.h | 5 +++++ include/trace/perf.h | 13 +++---------- kernel/events/core.c | 20 +++++++++++++++++++- 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index fe6441203b59..222f6aa0418f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -609,6 +609,11 @@ extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); +void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, + struct trace_event_call *call, u64 count, + struct pt_regs *regs, struct hlist_head *head, + struct task_struct *task); + static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, diff --git a/include/trace/perf.h b/include/trace/perf.h index a182306eefd7..88de5c205e86 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -64,16 +64,9 @@ perf_trace_##call(void *__data, proto) \ \ { assign; } \ \ - if (prog) { \ - *(struct pt_regs **)entry = __regs; \ - if (!trace_call_bpf(prog, entry) || hlist_empty(head)) { \ - perf_swevent_put_recursion_context(rctx); \ - return; \ - } \ - } \ - perf_trace_buf_submit(entry, __entry_size, rctx, \ - event_call->event.type, __count, __regs, \ - head, __task); \ + perf_trace_run_bpf_submit(entry, __entry_size, rctx, \ + event_call, __count, __regs, \ + head, __task); \ } /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 5056abffef27..9eb23dc27462 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6741,7 +6741,6 @@ void perf_swevent_put_recursion_context(int rctx) put_recursion_context(swhash->recursion, rctx); } -EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { @@ -6998,6 +6997,25 @@ static int perf_tp_event_match(struct perf_event *event, return 1; } +void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, + struct trace_event_call *call, u64 count, + struct pt_regs *regs, struct hlist_head *head, + struct task_struct *task) +{ + struct bpf_prog *prog = call->prog; + + if (prog) { + *(struct pt_regs **)raw_data = regs; + if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) { + perf_swevent_put_recursion_context(rctx); + return; + } + } + perf_tp_event(call->event.type, count, raw_data, size, regs, head, + rctx, task); +} +EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit); + void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task) -- cgit v1.2.3 From b1c20f0b97b4e565fa50cde1e6b44c2fd327a1e0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 19 Apr 2016 14:02:19 -0400 Subject: netdev_features: Fold NETIF_F_ALL_TSO into NETIF_F_GSO_SOFTWARE This patch folds NETIF_F_ALL_TSO into the bitmask for NETIF_F_GSO_SOFTWARE. The idea is to avoid duplication of defines since the only difference between the two was the GSO_UDP bit. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 15eb0b12fff9..bc8736266749 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -152,11 +152,6 @@ enum { #define NETIF_F_GSO_MASK (__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \ __NETIF_F_BIT(NETIF_F_GSO_SHIFT)) -/* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ - NETIF_F_TSO_MANGLEID | \ - NETIF_F_TSO6 | NETIF_F_UFO) - /* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory @@ -170,6 +165,9 @@ enum { #define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ NETIF_F_FSO) +/* List of features with software fallbacks. */ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO) + /* * If one device supports one of these features, then enable them * for all in netdev_increment_features. -- cgit v1.2.3 From 237cd218099ce96edf2890a49aa191b38b84c2fc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:09 +0300 Subject: net/mlx5: Introduce device queue counters A queue counter can collect several statistics for one or more hardware queues (QPs, RQs, etc ..) that the counter is attached to. For Ethernet it will provide an "out of buffer" counter which collects the number of all packets that are dropped due to lack of software buffers. Here we add device commands to alloc/query/dealloc queue counters. Signed-off-by: Tariq Toukan Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 68 ++++++++++++++++++++++++++++ include/linux/mlx5/qp.h | 6 +++ 2 files changed, 74 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index def289375ecb..b720a274220d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -538,3 +538,71 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, mlx5_core_destroy_sq(dev, sq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); + +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); + +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); + +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, clear, reset); + MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size); +} +EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer) +{ + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + int err; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen); + if (!err) + *out_of_buffer = MLX5_GET(query_q_counter_out, out, + out_of_buffer); + + kfree(out); + return err; +} diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index cf031a3f16c5..64221027bf1f 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -668,6 +668,12 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq); +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id); +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size); +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer); static inline const char *mlx5_qp_type_str(int type) { -- cgit v1.2.3 From 461017cb006aa1b39b0f647ae0ee2d9d84eef05b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:13 +0300 Subject: net/mlx5e: Support RX multi-packet WQE (Striding RQ) Introduce the feature of multi-packet WQE (RX Work Queue Element) referred to as (MPWQE or Striding RQ), in which WQEs are larger and serve multiple packets each. Every WQE consists of many strides of the same size, every received packet is aligned to a beginning of a stride and is written to consecutive strides within a WQE. In the regular approach, each regular WQE is big enough to be capable of serving one received packet of any size up to MTU or 64K in case of device LRO is enabled, making it very wasteful when dealing with small packets or device LRO is enabled. For its flexibility, MPWQE allows a better memory utilization (implying improvements in CPU utilization and packet rate) as packets consume strides according to their size, preserving the rest of the WQE to be available for other packets. MPWQE default configuration: Num of WQEs = 16 Strides Per WQE = 2048 Stride Size = 64 byte The default WQEs memory footprint went from 1024*mtu (~1.5MB) to 16 * 2048 * 64 = 2MB per ring. However, HW LRO can now be supported at no additional cost in memory footprint, and hence we turn it on by default and get an even better performance. Performance tested on ConnectX4-Lx 50G. To isolate the feature under test, the numbers below were measured with HW LRO turned off. We verified that the performance just improves when LRO is turned back on. * Netperf single TCP stream: - BW raised by 10-15% for representative packet sizes: default, 64B, 1024B, 1478B, 65536B. * Netperf multi TCP stream: - No degradation, line rate reached. * Pktgen: packet rate raised by 2-10% for traffic of different message sizes: 64B, 128B, 256B, 1024B, and 1500B. * Pktgen: packet loss in bursts of small messages (64byte), single stream: - | num packets | packets loss before | packets loss after | 2K | ~ 1K | 0 | 8K | ~ 6K | 0 | 16K | ~13K | 0 | 32K | ~28K | 0 | 64K | ~57K | ~24K As expected as the driver can receive as many small packets (<=64B) as the number of total strides in the ring (default = 2048 * 16) vs. 1024 (default ring size regardless of packets size) before this feature. Signed-off-by: Tariq Toukan Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 77 ++++++++++- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 15 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 109 +++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 153 +++++++++++++++++++-- include/linux/mlx5/device.h | 39 +++++- 5 files changed, 349 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 61e249d8d7f8..f519148d7dcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -57,12 +57,30 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 + +#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES) +#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE) +#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\ + MLX5_MPWRQ_LOG_STRIDE_SIZE) +#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ + MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) +#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ + MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) + #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) @@ -74,6 +92,38 @@ #define MLX5E_NUM_MAIN_GROUPS 9 #define MLX5E_NET_IP_ALIGN 2 +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +static inline int mlx5_min_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + } +} + +static inline int mlx5_max_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + } +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -128,6 +178,7 @@ static const char vport_strings[][ETH_GSTRING_LEN] = { "tx_queue_wake", "tx_queue_dropped", "rx_wqe_err", + "rx_mpwqe_filler", }; struct mlx5e_vport_stats { @@ -169,8 +220,9 @@ struct mlx5e_vport_stats { u64 tx_queue_wake; u64 tx_queue_dropped; u64 rx_wqe_err; + u64 rx_mpwqe_filler; -#define NUM_VPORT_COUNTERS 35 +#define NUM_VPORT_COUNTERS 36 }; static const char pport_strings[][ETH_GSTRING_LEN] = { @@ -263,7 +315,8 @@ static const char rq_stats_strings[][ETH_GSTRING_LEN] = { "csum_sw", "lro_packets", "lro_bytes", - "wqe_err" + "wqe_err", + "mpwqe_filler", }; struct mlx5e_rq_stats { @@ -274,7 +327,8 @@ struct mlx5e_rq_stats { u64 lro_packets; u64 lro_bytes; u64 wqe_err; -#define NUM_RQ_STATS 7 + u64 mpwqe_filler; +#define NUM_RQ_STATS 8 }; static const char sq_stats_strings[][ETH_GSTRING_LEN] = { @@ -318,6 +372,7 @@ struct mlx5e_stats { struct mlx5e_params { u8 log_sq_size; + u8 rq_wq_type; u8 log_rq_size; u16 num_channels; u8 num_tc; @@ -374,11 +429,23 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +struct mlx5e_dma_info { + struct page *page; + dma_addr_t addr; +}; + +struct mlx5e_mpw_info { + struct mlx5e_dma_info dma_info; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; u32 wqe_sz; struct sk_buff **skb; + struct mlx5e_mpw_info *wqe_info; struct device *pdev; struct net_device *netdev; @@ -393,6 +460,7 @@ struct mlx5e_rq { /* control */ struct mlx5_wq_ctrl wq_ctrl; + u8 wq_type; u32 rqn; struct mlx5e_channel *channel; struct mlx5e_priv *priv; @@ -649,9 +717,12 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_update_stats(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 6f40ba448f07..4077856aab76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -273,8 +273,9 @@ static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + int rq_wq_type = priv->params.rq_wq_type; - param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->params.log_rq_size; param->tx_pending = 1 << priv->params.log_sq_size; @@ -285,6 +286,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); bool was_opened; + int rq_wq_type = priv->params.rq_wq_type; u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; @@ -300,16 +302,16 @@ static int mlx5e_set_ringparam(struct net_device *dev, __func__); return -EINVAL; } - if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + 1 << mlx5_min_log_rq_size(rq_wq_type)); return -EINVAL; } - if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + 1 << mlx5_max_log_rq_size(rq_wq_type)); return -EINVAL; } if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { @@ -327,8 +329,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, log_rq_size = order_base_2(param->rx_pending); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = min_t(u16, param->rx_pending - 1, - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending); if (log_rq_size == priv->params.log_rq_size && log_sq_size == priv->params.log_sq_size && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 23ba12c3a738..871f3af204dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -175,6 +175,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_csum_none = 0; s->rx_csum_sw = 0; s->rx_wqe_err = 0; + s->rx_mpwqe_filler = 0; for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; @@ -185,6 +186,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_sw += rq_stats->csum_sw; s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler += rq_stats->mpwqe_filler; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -323,6 +325,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 byte_count; int wq_sz; int err; int i; @@ -337,28 +340,47 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe_info) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; + rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + + rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE; + byte_count = rq->wqe_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; + + rq->wqe_sz = (priv->params.lro_en) ? + priv->params.lro_wqe_sz : + MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; + byte_count |= MLX5_HW_START_PADDING; + } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); - u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = - cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + wqe->data.byte_count = cpu_to_be32(byte_count); } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; - rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = &priv->tstamp; @@ -376,7 +398,14 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - kfree(rq->skb); + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kfree(rq->wqe_info); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + kfree(rq->skb); + } + mlx5_wq_destroy(&rq->wq_ctrl); } @@ -1065,7 +1094,18 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + MLX5_SET(wq, wq, log_wqe_num_of_strides, + MLX5_MPWRQ_LOG_NUM_STRIDES - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, + MLX5_MPWRQ_LOG_STRIDE_SIZE - 6); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + } + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); @@ -1111,8 +1151,18 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; + u8 log_cq_size; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = priv->params.log_rq_size + + MLX5_MPWRQ_LOG_NUM_STRIDES; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + log_cq_size = priv->params.log_rq_size; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); mlx5e_build_common_cq_param(priv, param); } @@ -1983,7 +2033,8 @@ static int mlx5e_set_features(struct net_device *netdev, if (changes & NETIF_F_LRO) { bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) + if (was_opened && (priv->params.rq_wq_type == + MLX5_WQ_TYPE_LINKED_LIST)) mlx5e_close_locked(priv->netdev); priv->params.lro_en = !!(features & NETIF_F_LRO); @@ -1992,7 +2043,8 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", err); - if (was_opened) + if (was_opened && (priv->params.rq_wq_type == + MLX5_WQ_TYPE_LINKED_LIST)) err = mlx5e_open_locked(priv->netdev); } @@ -2327,8 +2379,21 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.log_rq_size = - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rq_wq_type = MLX5_CAP_GEN(mdev, striding_rq) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.lro_en = true; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); priv->params.rx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_pkts = @@ -2338,8 +2403,6 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.min_rx_wqes = - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d7cccedddf34..71f3a5d244ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -76,6 +76,41 @@ err_free_skb: return -ENOMEM; } +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + gfp_t gfp_mask; + int i; + + gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; + wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + MLX5_MPWRQ_WQE_PAGE_ORDER); + if (unlikely(!wi->dma_info.page)) + return -ENOMEM; + + wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, + rq->wqe_sz, PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { + put_page(wi->dma_info.page); + return -ENOMEM; + } + + /* We split the high-order page into order-0 ones and manage their + * reference counter to minimize the memory held by small skb fragments + */ + split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE, + &wi->dma_info.page[i]._count); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wqe->data.addr = cpu_to_be64(wi->dma_info.addr); + + return 0; +} + bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; @@ -100,7 +135,8 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !mlx5_wq_ll_is_full(wq); } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); @@ -111,7 +147,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); - u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETH_HLEN; + u16 tot_len = cqe_bcnt - ETH_HLEN; if (eth->h_proto == htons(ETH_P_IP)) { tcp = (struct tcphdr *)(skb->data + ETH_HLEN + @@ -191,19 +227,17 @@ csum_none: } static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; - skb_put(skb, cqe_bcnt); - lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe); + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; @@ -228,12 +262,24 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; } +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + napi_gro_receive(rq->cq.napi, skb); +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; u16 wqe_counter; + u32 cqe_bcnt; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); @@ -253,16 +299,103 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } - mlx5e_build_rx_skb(cqe, rq, skb); - rq->stats.packets++; - rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); - napi_gro_receive(rq->cq.napi, skb); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_put(skb, cqe_bcnt); + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); + struct sk_buff *skb; + u32 consumed_bytes; + u32 head_offset; + u32 frag_offset; + u32 wqe_offset; + u32 page_idx; + u16 byte_cnt; + u16 cqe_bcnt; + u16 headlen; + int i; + + wi->consumed_strides += cstrides; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + rq->stats.mpwqe_filler++; + goto mpwrq_cqe_out; + } + + skb = netdev_alloc_skb(rq->netdev, + ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, + sizeof(long))); + if (unlikely(!skb)) + goto mpwrq_cqe_out; + + prefetch(skb->data); + wqe_offset = stride_ix * MLX5_MPWRQ_STRIDE_SIZE; + consumed_bytes = cstrides * MLX5_MPWRQ_STRIDE_SIZE; + dma_sync_single_for_cpu(rq->pdev, wi->dma_info.addr + wqe_offset, + consumed_bytes, DMA_FROM_DEVICE); + + head_offset = wqe_offset & (PAGE_SIZE - 1); + page_idx = wqe_offset >> PAGE_SHIFT; + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); + frag_offset = head_offset + headlen; + + byte_cnt = cqe_bcnt - headlen; + while (byte_cnt) { + u32 pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + unsigned int truesize = + ALIGN(pg_consumed_bytes, MLX5_MPWRQ_STRIDE_SIZE); + + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + &wi->dma_info.page[page_idx], frag_offset, + pg_consumed_bytes, truesize); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + page_idx++; + } + + skb_copy_to_linear_data(skb, + page_address(wi->dma_info.page) + wqe_offset, + ALIGN(headlen, sizeof(long))); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES)) + return; + + dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, + PCI_DMA_FROMDEVICE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + &wi->dma_info.page[i]._count); + put_page(&wi->dma_info.page[i]); + } + mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8156e3c9239c..03f8d719b680 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -644,7 +644,8 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[4]; + u8 rsvd0[2]; + __be16 wqe_id; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; @@ -696,6 +697,42 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) return (u64)lo | ((u64)hi << 32); } +struct mpwrq_cqe_bc { + __be16 filler_consumed_strides; + __be16 byte_cnt; +}; + +static inline u16 mpwrq_get_cqe_byte_cnt(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return be16_to_cpu(bc->byte_cnt); +} + +static inline u16 mpwrq_get_cqe_bc_consumed_strides(struct mpwrq_cqe_bc *bc) +{ + return 0x7fff & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_consumed_strides(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return mpwrq_get_cqe_bc_consumed_strides(bc); +} + +static inline bool mpwrq_is_filler_cqe(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return 0x8000 & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_stride_index(struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->wqe_counter); +} + enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, -- cgit v1.2.3 From b7aade15485a660cbf5161962c284131324a9534 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 18 Apr 2016 21:19:47 +0200 Subject: vxlan: break dependency with netdev drivers Currently all drivers depend and autoload the vxlan module because how vxlan_get_rx_port is linked into them. Remove this dependency: By using a new event type in the netdevice notifier call chain we proxy the request from the drivers to flush and resetup the vxlan ports not directly via function call but by the already existing netdevice notifier call chain. I added a separate new event type, NETDEV_OFFLOAD_PUSH_VXLAN, to do so. We don't need to save those ids, as the event type field is an unsigned long and using specialized event types for this purpose seemed to be a more elegant way. This also comes in beneficial if in future we want to add offloading knobs for vxlan. Cc: Jesse Gross Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 14 +++++++++----- include/linux/netdevice.h | 1 + include/net/vxlan.h | 6 ++---- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c2e22c2532a1..6fb93b57a724 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2527,7 +2527,7 @@ static struct device_type vxlan_type = { * supply the listening VXLAN udp ports. Callers are expected * to implement the ndo_add_vxlan_port. */ -void vxlan_get_rx_port(struct net_device *dev) +static void vxlan_push_rx_ports(struct net_device *dev) { struct vxlan_sock *vs; struct net *net = dev_net(dev); @@ -2536,6 +2536,9 @@ void vxlan_get_rx_port(struct net_device *dev) __be16 port; unsigned int i; + if (!dev->netdev_ops->ndo_add_vxlan_port) + return; + spin_lock(&vn->sock_lock); for (i = 0; i < PORT_HASH_SIZE; ++i) { hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { @@ -2547,7 +2550,6 @@ void vxlan_get_rx_port(struct net_device *dev) } spin_unlock(&vn->sock_lock); } -EXPORT_SYMBOL_GPL(vxlan_get_rx_port); /* Initialize the device structure. */ static void vxlan_setup(struct net_device *dev) @@ -3283,20 +3285,22 @@ static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, unregister_netdevice_many(&list_kill); } -static int vxlan_lowerdev_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int vxlan_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); if (event == NETDEV_UNREGISTER) vxlan_handle_lowerdev_unregister(vn, dev); + else if (event == NETDEV_OFFLOAD_PUSH_VXLAN) + vxlan_push_rx_ports(dev); return NOTIFY_DONE; } static struct notifier_block vxlan_notifier_block __read_mostly = { - .notifier_call = vxlan_lowerdev_event, + .notifier_call = vxlan_netdevice_event, }; static __net_init int vxlan_init_net(struct net *net) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a3bb534576a3..d4c8cd424f8d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2244,6 +2244,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B +#define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index d442eb3129cd..673e9f9e6da7 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -390,13 +390,11 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) return vni_field; } -#if IS_ENABLED(CONFIG_VXLAN) -void vxlan_get_rx_port(struct net_device *netdev); -#else static inline void vxlan_get_rx_port(struct net_device *netdev) { + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_VXLAN, netdev); } -#endif static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { -- cgit v1.2.3 From 681e683ff30ada19f73c17c38a528528dd8824f1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 18 Apr 2016 21:19:48 +0200 Subject: geneve: break dependency with netdev drivers Equivalent to "vxlan: break dependency with netdev drivers", don't autoload geneve module in case the driver is loaded. Instead make the coupling weaker by using netdevice notifiers as proxy. Cc: Jesse Gross Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/geneve.c | 31 ++++++++++++++++++++++++++++--- include/linux/netdevice.h | 1 + include/net/geneve.h | 6 ++---- 3 files changed, 31 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 512dbe013713..9c40b88fabd5 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1172,7 +1172,7 @@ static struct device_type geneve_type = { * supply the listening GENEVE udp ports. Callers are expected * to implement the ndo_add_geneve_port. */ -void geneve_get_rx_port(struct net_device *dev) +static void geneve_push_rx_ports(struct net_device *dev) { struct net *net = dev_net(dev); struct geneve_net *gn = net_generic(net, geneve_net_id); @@ -1181,6 +1181,9 @@ void geneve_get_rx_port(struct net_device *dev) struct sock *sk; __be16 port; + if (!dev->netdev_ops->ndo_add_geneve_port) + return; + rcu_read_lock(); list_for_each_entry_rcu(gs, &gn->sock_list, list) { sk = gs->sock->sk; @@ -1190,7 +1193,6 @@ void geneve_get_rx_port(struct net_device *dev) } rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(geneve_get_rx_port); /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) @@ -1538,6 +1540,21 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); +static int geneve_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event == NETDEV_OFFLOAD_PUSH_GENEVE) + geneve_push_rx_ports(dev); + + return NOTIFY_DONE; +} + +static struct notifier_block geneve_notifier_block __read_mostly = { + .notifier_call = geneve_netdevice_event, +}; + static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); @@ -1590,11 +1607,18 @@ static int __init geneve_init_module(void) if (rc) goto out1; - rc = rtnl_link_register(&geneve_link_ops); + rc = register_netdevice_notifier(&geneve_notifier_block); if (rc) goto out2; + rc = rtnl_link_register(&geneve_link_ops); + if (rc) + goto out3; + return 0; + +out3: + unregister_netdevice_notifier(&geneve_notifier_block); out2: unregister_pernet_subsys(&geneve_net_ops); out1: @@ -1605,6 +1629,7 @@ late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { rtnl_link_unregister(&geneve_link_ops); + unregister_netdevice_notifier(&geneve_notifier_block); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4c8cd424f8d..1f6d5db471a2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2245,6 +2245,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C +#define NETDEV_OFFLOAD_PUSH_GENEVE 0x001D int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); diff --git a/include/net/geneve.h b/include/net/geneve.h index e6c23dc765f7..cb544a530146 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,13 +62,11 @@ struct genevehdr { struct geneve_opt options[]; }; -#if IS_ENABLED(CONFIG_GENEVE) -void geneve_get_rx_port(struct net_device *netdev); -#else static inline void geneve_get_rx_port(struct net_device *netdev) { + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_GENEVE, netdev); } -#endif #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, -- cgit v1.2.3 From 92a39d9043ba5ff98adb1c31491f00c7bea5466e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 23 Mar 2016 17:38:24 +0100 Subject: clk: composite: Add unregister function The composite clock didn't have any unregistration function, which forced us to use clk_unregister directly on it. While it was already not great from an API point of view, it also meant that we were leaking the clk_composite structure allocated in clk_register_composite. Add a clk_unregister_composite function to fix this. Signed-off-by: Maxime Ripard Signed-off-by: Stephen Boyd --- drivers/clk/clk-composite.c | 15 +++++++++++++++ include/linux/clk-provider.h | 1 + 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index 1f903e1f86a2..b0f3b84ebd13 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -286,3 +286,18 @@ err: kfree(composite); return clk; } + +void clk_unregister_composite(struct clk *clk) +{ + struct clk_composite *composite; + struct clk_hw *hw; + + hw = __clk_get_hw(clk); + if (!hw) + return; + + composite = to_clk_composite(hw); + + clk_unregister(clk); + kfree(composite); +} diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index da95258127aa..26a8c9b7be71 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -603,6 +603,7 @@ struct clk *clk_register_composite(struct device *dev, const char *name, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags); +void clk_unregister_composite(struct clk *clk); /*** * struct clk_gpio_gate - gpio gated clock -- cgit v1.2.3 From 372a12ed9d99c02f105278a9b75f0cb176d15cc1 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 8 Apr 2016 13:40:53 +0200 Subject: PM / Runtime: Move ignore_children flag under CONFIG_PM The ignore_children flag is used only when CONFIG_PM is set, so let's move it into that section within the struct dev_pm_info. Move also the corresponding pm_suspend_ignore_children() API out of device.h into pm_runtime.h, to be consistent with similar APIs. Unfortunate this causes the Toshiba PCI SD mmc host driver to fail to compile as it needs pm_runtime.h, so let's fix this here as well. Signed-off-by: Ulf Hansson Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/mmc/host/toshsd.c | 1 + include/linux/device.h | 5 ----- include/linux/pm.h | 2 +- include/linux/pm_runtime.h | 6 ++++++ 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/toshsd.c b/drivers/mmc/host/toshsd.c index e2cdd5fb1423..553ef41bb806 100644 --- a/drivers/mmc/host/toshsd.c +++ b/drivers/mmc/host/toshsd.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/device.h b/include/linux/device.h index 002c59728dbe..b130304f9b1b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -956,11 +956,6 @@ static inline bool device_async_suspend_enabled(struct device *dev) return !!dev->power.async_suspend; } -static inline void pm_suspend_ignore_children(struct device *dev, bool enable) -{ - dev->power.ignore_children = enable; -} - static inline void dev_pm_syscore_device(struct device *dev, bool val) { #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/pm.h b/include/linux/pm.h index 6a5d654f4447..06eb353182ab 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -563,7 +563,6 @@ struct dev_pm_info { bool is_suspended:1; /* Ditto */ bool is_noirq_suspended:1; bool is_late_suspended:1; - bool ignore_children:1; bool early_init:1; /* Owned by the PM core */ bool direct_complete:1; /* Owned by the PM core */ spinlock_t lock; @@ -591,6 +590,7 @@ struct dev_pm_info { unsigned int deferred_resume:1; unsigned int run_wake:1; unsigned int runtime_auto:1; + bool ignore_children:1; unsigned int no_callbacks:1; unsigned int irq_safe:1; unsigned int use_autosuspend:1; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7af093d6a4dd..2e14d2667b6c 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -56,6 +56,11 @@ extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); +static inline void pm_suspend_ignore_children(struct device *dev, bool enable) +{ + dev->power.ignore_children = enable; +} + static inline bool pm_children_suspended(struct device *dev) { return dev->power.ignore_children @@ -156,6 +161,7 @@ static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline bool pm_children_suspended(struct device *dev) { return false; } static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} -- cgit v1.2.3 From 9df3921e026532eb3bd2904745d990c0a9f0b4e4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 31 Mar 2016 11:21:25 +0200 Subject: PM / Domains: Rename stop_ok to suspend_ok for the genpd governor The genpd governor validates the latency constraints to find out whether it's acceptable to runtime suspend a device. Earlier this validation was made to know whether it was okay to invoke the ->stop() callback for the device, hence the governor used the name "stop_ok" for the related variables. To clarify the code around this, let's rename these variables from "stop_ok" to "suspend_ok". Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 6 +++--- drivers/base/power/domain_governor.c | 20 ++++++++++---------- include/linux/pm_domain.h | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 56705b52758e..c62687d9fa5a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -382,7 +382,7 @@ static void genpd_power_off_work_fn(struct work_struct *work) static int pm_genpd_runtime_suspend(struct device *dev) { struct generic_pm_domain *genpd; - bool (*stop_ok)(struct device *__dev); + bool (*suspend_ok)(struct device *__dev); struct gpd_timing_data *td = &dev_gpd_data(dev)->td; bool runtime_pm = pm_runtime_enabled(dev); ktime_t time_start; @@ -401,8 +401,8 @@ static int pm_genpd_runtime_suspend(struct device *dev) * runtime PM is disabled. Under these circumstances, we shall skip * validating/measuring the PM QoS latency. */ - stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL; - if (runtime_pm && stop_ok && !stop_ok(dev)) + suspend_ok = genpd->gov ? genpd->gov->suspend_ok : NULL; + if (runtime_pm && suspend_ok && !suspend_ok(dev)) return -EBUSY; /* Measure suspend latency. */ diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 00a5436dd44b..2e0fce711135 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -37,10 +37,10 @@ static int dev_update_qos_constraint(struct device *dev, void *data) } /** - * default_stop_ok - Default PM domain governor routine for stopping devices. + * default_suspend_ok - Default PM domain governor routine to suspend devices. * @dev: Device to check. */ -static bool default_stop_ok(struct device *dev) +static bool default_suspend_ok(struct device *dev) { struct gpd_timing_data *td = &dev_gpd_data(dev)->td; unsigned long flags; @@ -51,13 +51,13 @@ static bool default_stop_ok(struct device *dev) spin_lock_irqsave(&dev->power.lock, flags); if (!td->constraint_changed) { - bool ret = td->cached_stop_ok; + bool ret = td->cached_suspend_ok; spin_unlock_irqrestore(&dev->power.lock, flags); return ret; } td->constraint_changed = false; - td->cached_stop_ok = false; + td->cached_suspend_ok = false; td->effective_constraint_ns = -1; constraint_ns = __dev_pm_qos_read_value(dev); @@ -83,13 +83,13 @@ static bool default_stop_ok(struct device *dev) return false; } td->effective_constraint_ns = constraint_ns; - td->cached_stop_ok = constraint_ns >= 0; + td->cached_suspend_ok = constraint_ns >= 0; /* * The children have been suspended already, so we don't need to take - * their stop latencies into account here. + * their suspend latencies into account here. */ - return td->cached_stop_ok; + return td->cached_suspend_ok; } /** @@ -150,7 +150,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, */ td = &to_gpd_data(pdd)->td; constraint_ns = td->effective_constraint_ns; - /* default_stop_ok() need not be called before us. */ + /* default_suspend_ok() need not be called before us. */ if (constraint_ns < 0) { constraint_ns = dev_pm_qos_read_value(pdd->dev); constraint_ns *= NSEC_PER_USEC; @@ -227,7 +227,7 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) } struct dev_power_governor simple_qos_governor = { - .stop_ok = default_stop_ok, + .suspend_ok = default_suspend_ok, .power_down_ok = default_power_down_ok, }; @@ -236,5 +236,5 @@ struct dev_power_governor simple_qos_governor = { */ struct dev_power_governor pm_domain_always_on_gov = { .power_down_ok = always_on_power_down_ok, - .stop_ok = default_stop_ok, + .suspend_ok = default_suspend_ok, }; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 49cd8890b873..e91393954384 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -28,7 +28,7 @@ enum gpd_status { struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); - bool (*stop_ok)(struct device *dev); + bool (*suspend_ok)(struct device *dev); }; struct gpd_dev_ops { @@ -94,7 +94,7 @@ struct gpd_timing_data { s64 resume_latency_ns; s64 effective_constraint_ns; bool constraint_changed; - bool cached_stop_ok; + bool cached_suspend_ok; }; struct pm_domain_data { -- cgit v1.2.3 From 54eeddbf92d0de297d78f7419dde00079d553dec Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 31 Mar 2016 11:21:27 +0200 Subject: PM / Domains: Remove ->save|restore_state() callbacks As a part of the ongoing consolidation of genpd, it's become questionable whether clients actually needs to be able to assign their own set of ->save|restore_state() callbacks. Currently all users copes fine with the default callbacks, so let's remove the configuration option and stick to the default ones. This enables further clarifications of the related code and let's also rename pm_genpd_default_save|restore_state() into __genpd_runtime_suspend|resume() to apply the rule of static functionnames in genpd. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 113 +++++++++++++++++++------------------------- include/linux/pm_domain.h | 2 - 2 files changed, 49 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 75b994a63e71..4ce4ce0a2730 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -229,17 +229,6 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth) return ret; } -static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, save_state, dev); -} - -static int genpd_restore_dev(struct generic_pm_domain *genpd, - struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev); -} - static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, unsigned long val, void *ptr) { @@ -371,6 +360,52 @@ static void genpd_power_off_work_fn(struct work_struct *work) mutex_unlock(&genpd->lock); } +/** + * __genpd_runtime_suspend - walk the hierarchy of ->runtime_suspend() callbacks + * @dev: Device to handle. + */ +static int __genpd_runtime_suspend(struct device *dev) +{ + int (*cb)(struct device *__dev); + + if (dev->type && dev->type->pm) + cb = dev->type->pm->runtime_suspend; + else if (dev->class && dev->class->pm) + cb = dev->class->pm->runtime_suspend; + else if (dev->bus && dev->bus->pm) + cb = dev->bus->pm->runtime_suspend; + else + cb = NULL; + + if (!cb && dev->driver && dev->driver->pm) + cb = dev->driver->pm->runtime_suspend; + + return cb ? cb(dev) : 0; +} + +/** + * __genpd_runtime_resume - walk the hierarchy of ->runtime_resume() callbacks + * @dev: Device to handle. + */ +static int __genpd_runtime_resume(struct device *dev) +{ + int (*cb)(struct device *__dev); + + if (dev->type && dev->type->pm) + cb = dev->type->pm->runtime_resume; + else if (dev->class && dev->class->pm) + cb = dev->class->pm->runtime_resume; + else if (dev->bus && dev->bus->pm) + cb = dev->bus->pm->runtime_resume; + else + cb = NULL; + + if (!cb && dev->driver && dev->driver->pm) + cb = dev->driver->pm->runtime_resume; + + return cb ? cb(dev) : 0; +} + /** * genpd_runtime_suspend - Suspend a device belonging to I/O PM domain. * @dev: Device to suspend. @@ -409,13 +444,13 @@ static int genpd_runtime_suspend(struct device *dev) if (runtime_pm) time_start = ktime_get(); - ret = genpd_save_dev(genpd, dev); + ret = __genpd_runtime_suspend(dev); if (ret) return ret; ret = genpd_stop_dev(genpd, dev); if (ret) { - genpd_restore_dev(genpd, dev); + __genpd_runtime_resume(dev); return ret; } @@ -491,7 +526,7 @@ static int genpd_runtime_resume(struct device *dev) if (ret) goto err_poweroff; - ret = genpd_restore_dev(genpd, dev); + ret = __genpd_runtime_resume(dev); if (ret) goto err_stop; @@ -1427,54 +1462,6 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); -/* Default device callbacks for generic PM domains. */ - -/** - * pm_genpd_default_save_state - Default "save device state" for PM domains. - * @dev: Device to handle. - */ -static int pm_genpd_default_save_state(struct device *dev) -{ - int (*cb)(struct device *__dev); - - if (dev->type && dev->type->pm) - cb = dev->type->pm->runtime_suspend; - else if (dev->class && dev->class->pm) - cb = dev->class->pm->runtime_suspend; - else if (dev->bus && dev->bus->pm) - cb = dev->bus->pm->runtime_suspend; - else - cb = NULL; - - if (!cb && dev->driver && dev->driver->pm) - cb = dev->driver->pm->runtime_suspend; - - return cb ? cb(dev) : 0; -} - -/** - * pm_genpd_default_restore_state - Default PM domains "restore device state". - * @dev: Device to handle. - */ -static int pm_genpd_default_restore_state(struct device *dev) -{ - int (*cb)(struct device *__dev); - - if (dev->type && dev->type->pm) - cb = dev->type->pm->runtime_resume; - else if (dev->class && dev->class->pm) - cb = dev->class->pm->runtime_resume; - else if (dev->bus && dev->bus->pm) - cb = dev->bus->pm->runtime_resume; - else - cb = NULL; - - if (!cb && dev->driver && dev->driver->pm) - cb = dev->driver->pm->runtime_resume; - - return cb ? cb(dev) : 0; -} - /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -1520,8 +1507,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.restore_early = pm_genpd_resume_early; genpd->domain.ops.restore = pm_genpd_resume; genpd->domain.ops.complete = pm_genpd_complete; - genpd->dev_ops.save_state = pm_genpd_default_save_state; - genpd->dev_ops.restore_state = pm_genpd_default_restore_state; if (genpd->flags & GENPD_FLAG_PM_CLK) { genpd->dev_ops.stop = pm_clk_suspend; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index e91393954384..39285c7bd3f5 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -34,8 +34,6 @@ struct dev_power_governor { struct gpd_dev_ops { int (*start)(struct device *dev); int (*stop)(struct device *dev); - int (*save_state)(struct device *dev); - int (*restore_state)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From 916633a403702549d37ea353e63a68e5b0dc27ad Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:31 +0200 Subject: locking/rwsem: Provide down_write_killable() Now that all the architectures implement the necessary glue code we can introduce down_write_killable(). The only difference wrt. regular down_write() is that the slow path waits in TASK_KILLABLE state and the interruption by the fatal signal is reported as -EINTR to the caller. Signed-off-by: Michal Hocko Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-12-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/rwsem.h | 6 +++--- include/linux/lockdep.h | 15 +++++++++++++++ include/linux/rwsem.h | 1 + kernel/locking/rwsem.c | 19 +++++++++++++++++++ 4 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index d759c5f70f49..453744c1d347 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -102,9 +102,9 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) #define ____down_write(sem, slow_path) \ ({ \ long tmp; \ - struct rw_semaphore* ret = sem; \ + struct rw_semaphore* ret; \ asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %1,(%2)\n\t" \ + LOCK_PREFIX " xadd %1,(%3)\n\t" \ /* adds 0xffff0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -112,7 +112,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "+a" (ret) \ + : "+m" (sem->count), "=d" (tmp), "=a" (ret) \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..accfe56d8c51 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -444,6 +444,18 @@ do { \ lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) +#define LOCK_CONTENDED_RETURN(_lock, try, lock) \ +({ \ + int ____err = 0; \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + ____err = lock(_lock); \ + } \ + if (!____err) \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ + ____err; \ +}) + #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) @@ -452,6 +464,9 @@ do { \ #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) +#define LOCK_CONTENDED_RETURN(_lock, try, lock) \ + lock(_lock) + #endif /* CONFIG_LOCK_STAT */ #ifdef CONFIG_LOCKDEP diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 7d7ae029dac5..d1c12d160ace 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -118,6 +118,7 @@ extern int down_read_trylock(struct rw_semaphore *sem); * lock for writing */ extern void down_write(struct rw_semaphore *sem); +extern int __must_check down_write_killable(struct rw_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 205be0ce34de..c817216c1615 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -54,6 +54,25 @@ void __sched down_write(struct rw_semaphore *sem) EXPORT_SYMBOL(down_write); +/* + * lock for writing + */ +int __sched down_write_killable(struct rw_semaphore *sem) +{ + might_sleep(); + rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + + if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) { + rwsem_release(&sem->dep_map, 1, _RET_IP_); + return -EINTR; + } + + rwsem_set_owner(sem); + return 0; +} + +EXPORT_SYMBOL(down_write_killable); + /* * trylock for writing -- returns 1 if successful, 0 if contention */ -- cgit v1.2.3 From be32bcbbd18213803ff24e480340d4d2cca1df4f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Apr 2016 14:02:36 +0200 Subject: soc: renesas: Move pm-rcar to drivers/soc/renesas/rcar-sysc Move the pm-rcar driver from arch/arm/mach-shmobile/ to drivers/soc/renesas/, and its header file to include/linux/soc/renesas/, so it can be shared between arm32 (R-Car H1 and Gen2) and arm64 (R-Car Gen3). Rename it to rcar-sysc as it's really a driver for the R-Car System Controller (SYSC). Kill the intermediate PM_RCAR config symbol, as it's not user configurable anymore, and to prepare for SoC-specific make rules. Add the missing #include to rcar-sysc.h, which was exposed by different include order. Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Simon Horman --- MAINTAINERS | 4 + arch/arm/mach-shmobile/Kconfig | 11 +-- arch/arm/mach-shmobile/Makefile | 1 - arch/arm/mach-shmobile/pm-r8a7779.c | 3 +- arch/arm/mach-shmobile/pm-rcar-gen2.c | 2 +- arch/arm/mach-shmobile/pm-rcar.c | 164 ---------------------------------- arch/arm/mach-shmobile/pm-rcar.h | 15 ---- arch/arm/mach-shmobile/smp-r8a7779.c | 2 +- arch/arm/mach-shmobile/smp-r8a7790.c | 2 +- drivers/soc/Makefile | 3 +- drivers/soc/renesas/Makefile | 5 ++ drivers/soc/renesas/rcar-sysc.c | 164 ++++++++++++++++++++++++++++++++++ include/linux/soc/renesas/rcar-sysc.h | 17 ++++ 13 files changed, 201 insertions(+), 192 deletions(-) delete mode 100644 arch/arm/mach-shmobile/pm-rcar.c delete mode 100644 arch/arm/mach-shmobile/pm-rcar.h create mode 100644 drivers/soc/renesas/Makefile create mode 100644 drivers/soc/renesas/rcar-sysc.c create mode 100644 include/linux/soc/renesas/rcar-sysc.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 03e00c7c88eb..a0e23922a90b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1491,6 +1491,8 @@ Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next S: Supported F: arch/arm64/boot/dts/renesas/ +F: drivers/soc/renesas/ +F: include/linux/soc/renesas/ ARM/RISCPC ARCHITECTURE M: Russell King @@ -1604,6 +1606,8 @@ F: arch/arm/configs/shmobile_defconfig F: arch/arm/include/debug/renesas-scif.S F: arch/arm/mach-shmobile/ F: drivers/sh/ +F: drivers/soc/renesas/ +F: include/linux/soc/renesas/ ARM/SOCFPGA ARCHITECTURE M: Dinh Nguyen diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index f2bc5c353119..fe4ccb52f921 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -4,11 +4,6 @@ config ARCH_SHMOBILE config ARCH_SHMOBILE_MULTI bool -config PM_RCAR - bool - select PM - select PM_GENERIC_DOMAINS - config PM_RMOBILE bool select PM @@ -16,13 +11,15 @@ config PM_RMOBILE config ARCH_RCAR_GEN1 bool - select PM_RCAR + select PM + select PM_GENERIC_DOMAINS select RENESAS_INTC_IRQPIN select SYS_SUPPORTS_SH_TMU config ARCH_RCAR_GEN2 bool - select PM_RCAR + select PM + select PM_GENERIC_DOMAINS select RENESAS_IRQC select SYS_SUPPORTS_SH_CMT select PCI_DOMAINS if PCI diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index a65c80ac9009..ebb909c55b85 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -39,7 +39,6 @@ smp-$(CONFIG_ARCH_EMEV2) += smp-emev2.o headsmp-scu.o platsmp-scu.o # PM objects obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o -obj-$(CONFIG_PM_RCAR) += pm-rcar.o obj-$(CONFIG_PM_RMOBILE) += pm-rmobile.o obj-$(CONFIG_ARCH_RCAR_GEN2) += pm-rcar-gen2.o diff --git a/arch/arm/mach-shmobile/pm-r8a7779.c b/arch/arm/mach-shmobile/pm-r8a7779.c index 14c42a1bdf1e..4174cbcbc467 100644 --- a/arch/arm/mach-shmobile/pm-r8a7779.c +++ b/arch/arm/mach-shmobile/pm-r8a7779.c @@ -9,9 +9,10 @@ * for more details. */ +#include + #include -#include "pm-rcar.h" #include "r8a7779.h" /* SYSC */ diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c index 6815781ad116..691ac166a277 100644 --- a/arch/arm/mach-shmobile/pm-rcar-gen2.c +++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c @@ -13,9 +13,9 @@ #include #include #include +#include #include #include "common.h" -#include "pm-rcar.h" #include "rcar-gen2.h" /* RST */ diff --git a/arch/arm/mach-shmobile/pm-rcar.c b/arch/arm/mach-shmobile/pm-rcar.c deleted file mode 100644 index 0af05d288b09..000000000000 --- a/arch/arm/mach-shmobile/pm-rcar.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * R-Car SYSC Power management support - * - * Copyright (C) 2014 Magnus Damm - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include -#include -#include -#include -#include "pm-rcar.h" - -/* SYSC Common */ -#define SYSCSR 0x00 /* SYSC Status Register */ -#define SYSCISR 0x04 /* Interrupt Status Register */ -#define SYSCISCR 0x08 /* Interrupt Status Clear Register */ -#define SYSCIER 0x0c /* Interrupt Enable Register */ -#define SYSCIMR 0x10 /* Interrupt Mask Register */ - -/* SYSC Status Register */ -#define SYSCSR_PONENB 1 /* Ready for power resume requests */ -#define SYSCSR_POFFENB 0 /* Ready for power shutoff requests */ - -/* - * Power Control Register Offsets inside the register block for each domain - * Note: The "CR" registers for ARM cores exist on H1 only - * Use WFI to power off, CPG/APMU to resume ARM cores on R-Car Gen2 - */ -#define PWRSR_OFFS 0x00 /* Power Status Register */ -#define PWROFFCR_OFFS 0x04 /* Power Shutoff Control Register */ -#define PWROFFSR_OFFS 0x08 /* Power Shutoff Status Register */ -#define PWRONCR_OFFS 0x0c /* Power Resume Control Register */ -#define PWRONSR_OFFS 0x10 /* Power Resume Status Register */ -#define PWRER_OFFS 0x14 /* Power Shutoff/Resume Error */ - - -#define SYSCSR_RETRIES 100 -#define SYSCSR_DELAY_US 1 - -#define PWRER_RETRIES 100 -#define PWRER_DELAY_US 1 - -#define SYSCISR_RETRIES 1000 -#define SYSCISR_DELAY_US 1 - -static void __iomem *rcar_sysc_base; -static DEFINE_SPINLOCK(rcar_sysc_lock); /* SMP CPUs + I/O devices */ - -static int rcar_sysc_pwr_on_off(const struct rcar_sysc_ch *sysc_ch, bool on) -{ - unsigned int sr_bit, reg_offs; - int k; - - if (on) { - sr_bit = SYSCSR_PONENB; - reg_offs = PWRONCR_OFFS; - } else { - sr_bit = SYSCSR_POFFENB; - reg_offs = PWROFFCR_OFFS; - } - - /* Wait until SYSC is ready to accept a power request */ - for (k = 0; k < SYSCSR_RETRIES; k++) { - if (ioread32(rcar_sysc_base + SYSCSR) & BIT(sr_bit)) - break; - udelay(SYSCSR_DELAY_US); - } - - if (k == SYSCSR_RETRIES) - return -EAGAIN; - - /* Submit power shutoff or power resume request */ - iowrite32(BIT(sysc_ch->chan_bit), - rcar_sysc_base + sysc_ch->chan_offs + reg_offs); - - return 0; -} - -static int rcar_sysc_power(const struct rcar_sysc_ch *sysc_ch, bool on) -{ - unsigned int isr_mask = BIT(sysc_ch->isr_bit); - unsigned int chan_mask = BIT(sysc_ch->chan_bit); - unsigned int status; - unsigned long flags; - int ret = 0; - int k; - - spin_lock_irqsave(&rcar_sysc_lock, flags); - - iowrite32(isr_mask, rcar_sysc_base + SYSCISCR); - - /* Submit power shutoff or resume request until it was accepted */ - for (k = 0; k < PWRER_RETRIES; k++) { - ret = rcar_sysc_pwr_on_off(sysc_ch, on); - if (ret) - goto out; - - status = ioread32(rcar_sysc_base + - sysc_ch->chan_offs + PWRER_OFFS); - if (!(status & chan_mask)) - break; - - udelay(PWRER_DELAY_US); - } - - if (k == PWRER_RETRIES) { - ret = -EIO; - goto out; - } - - /* Wait until the power shutoff or resume request has completed * */ - for (k = 0; k < SYSCISR_RETRIES; k++) { - if (ioread32(rcar_sysc_base + SYSCISR) & isr_mask) - break; - udelay(SYSCISR_DELAY_US); - } - - if (k == SYSCISR_RETRIES) - ret = -EIO; - - iowrite32(isr_mask, rcar_sysc_base + SYSCISCR); - - out: - spin_unlock_irqrestore(&rcar_sysc_lock, flags); - - pr_debug("sysc power domain %d: %08x -> %d\n", - sysc_ch->isr_bit, ioread32(rcar_sysc_base + SYSCISR), ret); - return ret; -} - -int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch) -{ - return rcar_sysc_power(sysc_ch, false); -} - -int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch) -{ - return rcar_sysc_power(sysc_ch, true); -} - -bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch) -{ - unsigned int st; - - st = ioread32(rcar_sysc_base + sysc_ch->chan_offs + PWRSR_OFFS); - if (st & BIT(sysc_ch->chan_bit)) - return true; - - return false; -} - -void __iomem *rcar_sysc_init(phys_addr_t base) -{ - rcar_sysc_base = ioremap_nocache(base, PAGE_SIZE); - if (!rcar_sysc_base) - panic("unable to ioremap R-Car SYSC hardware block\n"); - - return rcar_sysc_base; -} diff --git a/arch/arm/mach-shmobile/pm-rcar.h b/arch/arm/mach-shmobile/pm-rcar.h deleted file mode 100644 index 1b901db4a24c..000000000000 --- a/arch/arm/mach-shmobile/pm-rcar.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef PM_RCAR_H -#define PM_RCAR_H - -struct rcar_sysc_ch { - u16 chan_offs; - u8 chan_bit; - u8 isr_bit; -}; - -int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch); -int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch); -bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch); -void __iomem *rcar_sysc_init(phys_addr_t base); - -#endif /* PM_RCAR_H */ diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c index f5c31fbc10b2..c6951ee24588 100644 --- a/arch/arm/mach-shmobile/smp-r8a7779.c +++ b/arch/arm/mach-shmobile/smp-r8a7779.c @@ -19,13 +19,13 @@ #include #include #include +#include #include #include #include #include "common.h" -#include "pm-rcar.h" #include "r8a7779.h" #define AVECR IOMEM(0xfe700040) diff --git a/arch/arm/mach-shmobile/smp-r8a7790.c b/arch/arm/mach-shmobile/smp-r8a7790.c index f6426c6fdefc..28f26d5362d8 100644 --- a/arch/arm/mach-shmobile/smp-r8a7790.c +++ b/arch/arm/mach-shmobile/smp-r8a7790.c @@ -17,12 +17,12 @@ #include #include #include +#include #include #include "common.h" #include "platsmp-apmu.h" -#include "pm-rcar.h" #include "rcar-gen2.h" #include "r8a7790.h" diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index 5ade71306ee1..380230f03874 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -9,7 +9,8 @@ obj-$(CONFIG_MACH_DOVE) += dove/ obj-y += fsl/ obj-$(CONFIG_ARCH_MEDIATEK) += mediatek/ obj-$(CONFIG_ARCH_QCOM) += qcom/ -obj-$(CONFIG_ARCH_ROCKCHIP) += rockchip/ +obj-$(CONFIG_ARCH_RENESAS) += renesas/ +obj-$(CONFIG_ARCH_ROCKCHIP) += rockchip/ obj-$(CONFIG_SOC_SAMSUNG) += samsung/ obj-$(CONFIG_ARCH_SUNXI) += sunxi/ obj-$(CONFIG_ARCH_TEGRA) += tegra/ diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile new file mode 100644 index 000000000000..2b64f6c94681 --- /dev/null +++ b/drivers/soc/renesas/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_ARCH_R8A7779) += rcar-sysc.o +obj-$(CONFIG_ARCH_R8A7790) += rcar-sysc.o +obj-$(CONFIG_ARCH_R8A7791) += rcar-sysc.o +obj-$(CONFIG_ARCH_R8A7793) += rcar-sysc.o +obj-$(CONFIG_ARCH_R8A7794) += rcar-sysc.o diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c new file mode 100644 index 000000000000..d59bcdf78f0b --- /dev/null +++ b/drivers/soc/renesas/rcar-sysc.c @@ -0,0 +1,164 @@ +/* + * R-Car SYSC Power management support + * + * Copyright (C) 2014 Magnus Damm + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include + +/* SYSC Common */ +#define SYSCSR 0x00 /* SYSC Status Register */ +#define SYSCISR 0x04 /* Interrupt Status Register */ +#define SYSCISCR 0x08 /* Interrupt Status Clear Register */ +#define SYSCIER 0x0c /* Interrupt Enable Register */ +#define SYSCIMR 0x10 /* Interrupt Mask Register */ + +/* SYSC Status Register */ +#define SYSCSR_PONENB 1 /* Ready for power resume requests */ +#define SYSCSR_POFFENB 0 /* Ready for power shutoff requests */ + +/* + * Power Control Register Offsets inside the register block for each domain + * Note: The "CR" registers for ARM cores exist on H1 only + * Use WFI to power off, CPG/APMU to resume ARM cores on R-Car Gen2 + */ +#define PWRSR_OFFS 0x00 /* Power Status Register */ +#define PWROFFCR_OFFS 0x04 /* Power Shutoff Control Register */ +#define PWROFFSR_OFFS 0x08 /* Power Shutoff Status Register */ +#define PWRONCR_OFFS 0x0c /* Power Resume Control Register */ +#define PWRONSR_OFFS 0x10 /* Power Resume Status Register */ +#define PWRER_OFFS 0x14 /* Power Shutoff/Resume Error */ + + +#define SYSCSR_RETRIES 100 +#define SYSCSR_DELAY_US 1 + +#define PWRER_RETRIES 100 +#define PWRER_DELAY_US 1 + +#define SYSCISR_RETRIES 1000 +#define SYSCISR_DELAY_US 1 + +static void __iomem *rcar_sysc_base; +static DEFINE_SPINLOCK(rcar_sysc_lock); /* SMP CPUs + I/O devices */ + +static int rcar_sysc_pwr_on_off(const struct rcar_sysc_ch *sysc_ch, bool on) +{ + unsigned int sr_bit, reg_offs; + int k; + + if (on) { + sr_bit = SYSCSR_PONENB; + reg_offs = PWRONCR_OFFS; + } else { + sr_bit = SYSCSR_POFFENB; + reg_offs = PWROFFCR_OFFS; + } + + /* Wait until SYSC is ready to accept a power request */ + for (k = 0; k < SYSCSR_RETRIES; k++) { + if (ioread32(rcar_sysc_base + SYSCSR) & BIT(sr_bit)) + break; + udelay(SYSCSR_DELAY_US); + } + + if (k == SYSCSR_RETRIES) + return -EAGAIN; + + /* Submit power shutoff or power resume request */ + iowrite32(BIT(sysc_ch->chan_bit), + rcar_sysc_base + sysc_ch->chan_offs + reg_offs); + + return 0; +} + +static int rcar_sysc_power(const struct rcar_sysc_ch *sysc_ch, bool on) +{ + unsigned int isr_mask = BIT(sysc_ch->isr_bit); + unsigned int chan_mask = BIT(sysc_ch->chan_bit); + unsigned int status; + unsigned long flags; + int ret = 0; + int k; + + spin_lock_irqsave(&rcar_sysc_lock, flags); + + iowrite32(isr_mask, rcar_sysc_base + SYSCISCR); + + /* Submit power shutoff or resume request until it was accepted */ + for (k = 0; k < PWRER_RETRIES; k++) { + ret = rcar_sysc_pwr_on_off(sysc_ch, on); + if (ret) + goto out; + + status = ioread32(rcar_sysc_base + + sysc_ch->chan_offs + PWRER_OFFS); + if (!(status & chan_mask)) + break; + + udelay(PWRER_DELAY_US); + } + + if (k == PWRER_RETRIES) { + ret = -EIO; + goto out; + } + + /* Wait until the power shutoff or resume request has completed * */ + for (k = 0; k < SYSCISR_RETRIES; k++) { + if (ioread32(rcar_sysc_base + SYSCISR) & isr_mask) + break; + udelay(SYSCISR_DELAY_US); + } + + if (k == SYSCISR_RETRIES) + ret = -EIO; + + iowrite32(isr_mask, rcar_sysc_base + SYSCISCR); + + out: + spin_unlock_irqrestore(&rcar_sysc_lock, flags); + + pr_debug("sysc power domain %d: %08x -> %d\n", + sysc_ch->isr_bit, ioread32(rcar_sysc_base + SYSCISR), ret); + return ret; +} + +int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch) +{ + return rcar_sysc_power(sysc_ch, false); +} + +int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch) +{ + return rcar_sysc_power(sysc_ch, true); +} + +bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch) +{ + unsigned int st; + + st = ioread32(rcar_sysc_base + sysc_ch->chan_offs + PWRSR_OFFS); + if (st & BIT(sysc_ch->chan_bit)) + return true; + + return false; +} + +void __iomem *rcar_sysc_init(phys_addr_t base) +{ + rcar_sysc_base = ioremap_nocache(base, PAGE_SIZE); + if (!rcar_sysc_base) + panic("unable to ioremap R-Car SYSC hardware block\n"); + + return rcar_sysc_base; +} diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h new file mode 100644 index 000000000000..96f30c288388 --- /dev/null +++ b/include/linux/soc/renesas/rcar-sysc.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_SOC_RENESAS_RCAR_SYSC_H__ +#define __LINUX_SOC_RENESAS_RCAR_SYSC_H__ + +#include + +struct rcar_sysc_ch { + u16 chan_offs; + u8 chan_bit; + u8 isr_bit; +}; + +int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch); +int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch); +bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch); +void __iomem *rcar_sysc_init(phys_addr_t base); + +#endif /* __LINUX_SOC_RENESAS_RCAR_SYSC_H__ */ -- cgit v1.2.3 From 2f024cef5b064d5498fe466dfe9492d46b5b12a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Apr 2016 14:02:39 +0200 Subject: soc: renesas: rcar-sysc: Make rcar_sysc_power_is_off() static As of commit b12ff41658171f53 ("ARM: shmobile: r8a7779: Remove legacy PM Domain remainings"), rcar_sysc_power_is_off() is no longer used from SoC-specific code. Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/soc/renesas/rcar-sysc.c | 2 +- include/linux/soc/renesas/rcar-sysc.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c index 95f2b59cbd76..4e760d63f2ab 100644 --- a/drivers/soc/renesas/rcar-sysc.c +++ b/drivers/soc/renesas/rcar-sysc.c @@ -152,7 +152,7 @@ int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch) return rcar_sysc_power(sysc_ch, true); } -bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch) +static bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch) { unsigned int st; diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h index 96f30c288388..92fc613ab23d 100644 --- a/include/linux/soc/renesas/rcar-sysc.h +++ b/include/linux/soc/renesas/rcar-sysc.h @@ -11,7 +11,6 @@ struct rcar_sysc_ch { int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch); int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch); -bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch); void __iomem *rcar_sysc_init(phys_addr_t base); #endif /* __LINUX_SOC_RENESAS_RCAR_SYSC_H__ */ -- cgit v1.2.3 From 80dfd83dfab6e49a31ab8fc484a801aef1c567bd Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 13 Apr 2016 17:04:41 -0700 Subject: x86, drivers/pnpbios: Replace paravirt_enabled() check with legacy device check Since we are removing paravirt_enabled() replace it with a logical equivalent. Even though PNPBIOS is x86 specific we add an arch-specific type call, which can be implemented by any architecture to show how other legacy attribute devices can later be also checked for with other ACPI legacy attribute flags. This implicates the first ACPI 5.2.9.3 IA-PC Boot Architecture ACPI_FADT_LEGACY_DEVICES flag device, and shows how to add more. The reason pnpbios gets a defined structure and as such uses a different approach than the RTC legacy quirk is that ACPI has a respective RTC flag, while pnpbios does not. We fold the pnpbios quirk under ACPI_FADT_LEGACY_DEVICES ACPI flag use case, and use a struct of possible devices to enable future extensions of this. As per 0-day, this bumps the vmlinux size using i386-tinyconfig as follows: TOTAL TEXT init.text x86_early_init_platform_quirks() +32 +28 +28 +28 That's 4 byte overhead total, the rest is cleared out on init as its all __init text. v2: split out subarch handlng on switch to make it easier later to add other subarchs. The 'fall-through' switch handling can be confusing and we'll remove it later when we add handling for X86_SUBARCH_CE4100. v3: document vmlinux size impact as per 0-day, and also explain why pnpbios is treated differently than the RTC legacy feature. Signed-off-by: Luis R. Rodriguez Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andrew.cooper3@citrix.com Cc: andriy.shevchenko@linux.intel.com Cc: bigeasy@linutronix.de Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: ffainelli@freebox.fr Cc: george.dunlap@citrix.com Cc: glin@suse.com Cc: jgross@suse.com Cc: jlee@suse.com Cc: josh@joshtriplett.org Cc: julien.grall@linaro.org Cc: konrad.wilk@oracle.com Cc: kozerkov@parallels.com Cc: lenb@kernel.org Cc: lguest@lists.ozlabs.org Cc: linux-acpi@vger.kernel.org Cc: lv.zheng@intel.com Cc: matt@codeblueprint.co.uk Cc: mbizon@freebox.fr Cc: rjw@rjwysocki.net Cc: robert.moore@intel.com Cc: rusty@rustcorp.com.au Cc: tiwai@suse.de Cc: toshi.kani@hp.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1460592286-300-12-git-send-email-mcgrof@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 26 ++++++++++++++++++++++++++ arch/x86/kernel/platform-quirks.c | 11 +++++++++++ drivers/pnp/pnpbios/core.c | 3 ++- include/linux/pnp.h | 2 ++ 4 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 89d9d57e145d..4dcdf74dfed8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -141,16 +141,42 @@ struct x86_cpuinit_ops { struct timespec; +/** + * struct x86_legacy_devices - legacy x86 devices + * + * @pnpbios: this platform can have a PNPBIOS. If this is disabled the platform + * is known to never have a PNPBIOS. + * + * These are devices known to require LPC or ISA bus. The definition of legacy + * devices adheres to the ACPI 5.2.9.3 IA-PC Boot Architecture flag + * ACPI_FADT_LEGACY_DEVICES. These devices consist of user visible devices on + * the LPC or ISA bus. User visible devices are devices that have end-user + * accessible connectors (for example, LPT parallel port). Legacy devices on + * the LPC bus consist for example of serial and parallel ports, PS/2 keyboard + * / mouse, and the floppy disk controller. A system that lacks all known + * legacy devices can assume all devices can be detected exclusively via + * standard device enumeration mechanisms including the ACPI namespace. + * + * A system which has does not have ACPI_FADT_LEGACY_DEVICES enabled must not + * have any of the legacy devices enumerated below present. + */ +struct x86_legacy_devices { + int pnpbios; +}; + /** * struct x86_legacy_features - legacy x86 features * * @rtc: this device has a CMOS real-time clock present * @ebda_search: it's safe to search for the EBDA signature in the hardware's * low RAM + * @devices: legacy x86 devices, refer to struct x86_legacy_devices + * documentation for further details. */ struct x86_legacy_features { int rtc; int ebda_search; + struct x86_legacy_devices devices; }; /** diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 01b159781d96..ab643825a7aa 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -8,6 +8,7 @@ void __init x86_early_init_platform_quirks(void) { x86_platform.legacy.rtc = 1; x86_platform.legacy.ebda_search = 0; + x86_platform.legacy.devices.pnpbios = 1; switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_PC: @@ -15,6 +16,9 @@ void __init x86_early_init_platform_quirks(void) break; case X86_SUBARCH_XEN: case X86_SUBARCH_LGUEST: + x86_platform.legacy.devices.pnpbios = 0; + x86_platform.legacy.rtc = 0; + break; case X86_SUBARCH_INTEL_MID: x86_platform.legacy.rtc = 0; break; @@ -23,3 +27,10 @@ void __init x86_early_init_platform_quirks(void) if (x86_platform.set_legacy_features) x86_platform.set_legacy_features(); } + +#if defined(CONFIG_PNPBIOS) +bool __init arch_pnpbios_disabled(void) +{ + return x86_platform.legacy.devices.pnpbios == 0; +} +#endif diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index facd43b8516c..81603d99082b 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -521,10 +521,11 @@ static int __init pnpbios_init(void) int ret; if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table) || - paravirt_enabled()) { + arch_pnpbios_disabled()) { printk(KERN_INFO "PnPBIOS: Disabled\n"); return -ENODEV; } + #ifdef CONFIG_PNPACPI if (!acpi_disabled && !pnpacpi_disabled) { pnpbios_disabled = 1; diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 5df733b8f704..2588ca6a9028 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -337,9 +337,11 @@ extern struct mutex pnp_res_mutex; #ifdef CONFIG_PNPBIOS extern struct pnp_protocol pnpbios_protocol; +extern bool arch_pnpbios_disabled(void); #define pnp_device_is_pnpbios(dev) ((dev)->protocol == (&pnpbios_protocol)) #else #define pnp_device_is_pnpbios(dev) 0 +#define arch_pnpbios_disabled() false #endif #ifdef CONFIG_PNPACPI -- cgit v1.2.3 From 1dff8083a024650c75a9c961c38082473ceae8cf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 18:04:57 +0200 Subject: mm: replace open coded page to virt conversion with page_to_virt() The open coded conversion from struct page address to virtual address in lowmem_page_address() involves an intermediate conversion step to pfn number/physical address. Since the placement of the struct page array relative to the linear mapping may be completely independent from the placement of physical RAM (as is that case for arm64 after commit dfd55ad85e 'arm64: vmemmap: use virtual projection of linear region'), the conversion to physical address and back again should factor out of the equation, but unfortunately, the shifting and pointer arithmetic involved prevent this from happening, and the resulting calculation essentially subtracts the address of the start of physical memory and adds it back again, in a way that prevents the compiler from optimizing it away. Since the start of physical memory is not a build time constant on arm64, the resulting conversion involves an unnecessary memory access, which we would like to get rid of. So replace the open coded conversion with a call to page_to_virt(), and use the open coded conversion as its default definition, to be overriden by the architecture, if desired. The existing arch specific definitions of page_to_virt are all equivalent to this default definition, so by itself this patch is a no-op. Acked-by: Andrew Morton Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- include/linux/mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ffcff53e3b2b..1c1e921edf93 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -72,6 +72,10 @@ extern int mmap_rnd_compat_bits __read_mostly; #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) #endif +#ifndef page_to_virt +#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) +#endif + /* * To prevent common memory management code establishing * a zero page mapping on a read fault. @@ -948,7 +952,7 @@ static inline struct mem_cgroup *page_memcg(struct page *page) static __always_inline void *lowmem_page_address(const struct page *page) { - return __va(PFN_PHYS(page_to_pfn(page))); + return page_to_virt(page); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) -- cgit v1.2.3 From a7ab72390b77062420fb50e4451f71c9321aae05 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 20 Apr 2016 08:38:50 +0200 Subject: i2c: mux: add common data for every i2c-mux instance All i2c-muxes have a parent adapter and one or many child adapters. A mux also has some means of selection. Previously, this was stored per child adapter, but it is only needed to keep track of this per mux. Add an i2c mux core, that keeps track of this consistently. Also add some glue for users of the old interface, which will create one implicit mux core per child adapter. Signed-off-by: Peter Rosin Tested-by: Antti Palosaari Tested-by: Crestez Dan Leonard Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-mux.c | 175 ++++++++++++++++++++++++++++++++++++------------ include/linux/i2c-mux.h | 34 ++++++++++ 2 files changed, 168 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index d4022878b2f0..5ce1b0704cb5 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -28,33 +28,34 @@ #include /* multiplexer per channel data */ +struct i2c_mux_priv_old { + void *mux_priv; + int (*select)(struct i2c_adapter *, void *mux_priv, u32 chan_id); + int (*deselect)(struct i2c_adapter *, void *mux_priv, u32 chan_id); +}; + struct i2c_mux_priv { struct i2c_adapter adap; struct i2c_algorithm algo; - - struct i2c_adapter *parent; - struct device *mux_dev; - void *mux_priv; + struct i2c_mux_core *muxc; u32 chan_id; - - int (*select)(struct i2c_adapter *, void *mux_priv, u32 chan_id); - int (*deselect)(struct i2c_adapter *, void *mux_priv, u32 chan_id); }; static int i2c_mux_master_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) { struct i2c_mux_priv *priv = adap->algo_data; - struct i2c_adapter *parent = priv->parent; + struct i2c_mux_core *muxc = priv->muxc; + struct i2c_adapter *parent = muxc->parent; int ret; /* Switch to the right mux port and perform the transfer. */ - ret = priv->select(parent, priv->mux_priv, priv->chan_id); + ret = muxc->select(muxc, priv->chan_id); if (ret >= 0) ret = __i2c_transfer(parent, msgs, num); - if (priv->deselect) - priv->deselect(parent, priv->mux_priv, priv->chan_id); + if (muxc->deselect) + muxc->deselect(muxc, priv->chan_id); return ret; } @@ -65,17 +66,18 @@ static int i2c_mux_smbus_xfer(struct i2c_adapter *adap, int size, union i2c_smbus_data *data) { struct i2c_mux_priv *priv = adap->algo_data; - struct i2c_adapter *parent = priv->parent; + struct i2c_mux_core *muxc = priv->muxc; + struct i2c_adapter *parent = muxc->parent; int ret; /* Select the right mux port and perform the transfer. */ - ret = priv->select(parent, priv->mux_priv, priv->chan_id); + ret = muxc->select(muxc, priv->chan_id); if (ret >= 0) ret = parent->algo->smbus_xfer(parent, addr, flags, read_write, command, size, data); - if (priv->deselect) - priv->deselect(parent, priv->mux_priv, priv->chan_id); + if (muxc->deselect) + muxc->deselect(muxc, priv->chan_id); return ret; } @@ -84,7 +86,7 @@ static int i2c_mux_smbus_xfer(struct i2c_adapter *adap, static u32 i2c_mux_functionality(struct i2c_adapter *adap) { struct i2c_mux_priv *priv = adap->algo_data; - struct i2c_adapter *parent = priv->parent; + struct i2c_adapter *parent = priv->muxc->parent; return parent->algo->functionality(parent); } @@ -102,6 +104,20 @@ static unsigned int i2c_mux_parent_classes(struct i2c_adapter *parent) return class; } +static int i2c_mux_select(struct i2c_mux_core *muxc, u32 chan) +{ + struct i2c_mux_priv_old *priv = i2c_mux_priv(muxc); + + return priv->select(muxc->parent, priv->mux_priv, chan); +} + +static int i2c_mux_deselect(struct i2c_mux_core *muxc, u32 chan) +{ + struct i2c_mux_priv_old *priv = i2c_mux_priv(muxc); + + return priv->deselect(muxc->parent, priv->mux_priv, chan); +} + struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, struct device *mux_dev, void *mux_priv, u32 force_nr, u32 chan_id, @@ -111,21 +127,77 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, int (*deselect) (struct i2c_adapter *, void *, u32)) { + struct i2c_mux_core *muxc; + struct i2c_mux_priv_old *priv; + int ret; + + muxc = i2c_mux_alloc(parent, mux_dev, 1, sizeof(*priv), 0, + i2c_mux_select, i2c_mux_deselect); + if (!muxc) + return NULL; + + priv = i2c_mux_priv(muxc); + priv->select = select; + priv->deselect = deselect; + priv->mux_priv = mux_priv; + + ret = i2c_mux_add_adapter(muxc, force_nr, chan_id, class); + if (ret) { + devm_kfree(mux_dev, muxc); + return NULL; + } + + return muxc->adapter[0]; +} +EXPORT_SYMBOL_GPL(i2c_add_mux_adapter); + +struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, + struct device *dev, int max_adapters, + int sizeof_priv, u32 flags, + int (*select)(struct i2c_mux_core *, u32), + int (*deselect)(struct i2c_mux_core *, u32)) +{ + struct i2c_mux_core *muxc; + + muxc = devm_kzalloc(dev, sizeof(*muxc) + + max_adapters * sizeof(muxc->adapter[0]) + + sizeof_priv, GFP_KERNEL); + if (!muxc) + return NULL; + if (sizeof_priv) + muxc->priv = &muxc->adapter[max_adapters]; + + muxc->parent = parent; + muxc->dev = dev; + muxc->select = select; + muxc->deselect = deselect; + muxc->max_adapters = max_adapters; + + return muxc; +} +EXPORT_SYMBOL_GPL(i2c_mux_alloc); + +int i2c_mux_add_adapter(struct i2c_mux_core *muxc, + u32 force_nr, u32 chan_id, + unsigned int class) +{ + struct i2c_adapter *parent = muxc->parent; struct i2c_mux_priv *priv; char symlink_name[20]; int ret; - priv = kzalloc(sizeof(struct i2c_mux_priv), GFP_KERNEL); + if (muxc->num_adapters >= muxc->max_adapters) { + dev_err(muxc->dev, "No room for more i2c-mux adapters\n"); + return -EINVAL; + } + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) - return NULL; + return -ENOMEM; /* Set up private adapter data */ - priv->parent = parent; - priv->mux_dev = mux_dev; - priv->mux_priv = mux_priv; + priv->muxc = muxc; priv->chan_id = chan_id; - priv->select = select; - priv->deselect = deselect; /* Need to do algo dynamically because we don't know ahead * of time what sort of physical adapter we'll be dealing with. @@ -159,11 +231,11 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, * Try to populate the mux adapter's of_node, expands to * nothing if !CONFIG_OF. */ - if (mux_dev->of_node) { + if (muxc->dev->of_node) { struct device_node *child; u32 reg; - for_each_child_of_node(mux_dev->of_node, child) { + for_each_child_of_node(muxc->dev->of_node, child) { ret = of_property_read_u32(child, "reg", ®); if (ret) continue; @@ -177,8 +249,9 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, /* * Associate the mux channel with an ACPI node. */ - if (has_acpi_companion(mux_dev)) - acpi_preset_companion(&priv->adap.dev, ACPI_COMPANION(mux_dev), + if (has_acpi_companion(muxc->dev)) + acpi_preset_companion(&priv->adap.dev, + ACPI_COMPANION(muxc->dev), chan_id); if (force_nr) { @@ -192,33 +265,53 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, "failed to add mux-adapter (error=%d)\n", ret); kfree(priv); - return NULL; + return ret; } - WARN(sysfs_create_link(&priv->adap.dev.kobj, &mux_dev->kobj, "mux_device"), - "can't create symlink to mux device\n"); + WARN(sysfs_create_link(&priv->adap.dev.kobj, &muxc->dev->kobj, + "mux_device"), + "can't create symlink to mux device\n"); snprintf(symlink_name, sizeof(symlink_name), "channel-%u", chan_id); - WARN(sysfs_create_link(&mux_dev->kobj, &priv->adap.dev.kobj, symlink_name), - "can't create symlink for channel %u\n", chan_id); + WARN(sysfs_create_link(&muxc->dev->kobj, &priv->adap.dev.kobj, + symlink_name), + "can't create symlink for channel %u\n", chan_id); dev_info(&parent->dev, "Added multiplexed i2c bus %d\n", i2c_adapter_id(&priv->adap)); - return &priv->adap; + muxc->adapter[muxc->num_adapters++] = &priv->adap; + return 0; } -EXPORT_SYMBOL_GPL(i2c_add_mux_adapter); +EXPORT_SYMBOL_GPL(i2c_mux_add_adapter); -void i2c_del_mux_adapter(struct i2c_adapter *adap) +void i2c_mux_del_adapters(struct i2c_mux_core *muxc) { - struct i2c_mux_priv *priv = adap->algo_data; char symlink_name[20]; - snprintf(symlink_name, sizeof(symlink_name), "channel-%u", priv->chan_id); - sysfs_remove_link(&priv->mux_dev->kobj, symlink_name); + while (muxc->num_adapters) { + struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters]; + struct i2c_mux_priv *priv = adap->algo_data; + + muxc->adapter[muxc->num_adapters] = NULL; + + snprintf(symlink_name, sizeof(symlink_name), + "channel-%u", priv->chan_id); + sysfs_remove_link(&muxc->dev->kobj, symlink_name); + + sysfs_remove_link(&priv->adap.dev.kobj, "mux_device"); + i2c_del_adapter(adap); + kfree(priv); + } +} +EXPORT_SYMBOL_GPL(i2c_mux_del_adapters); + +void i2c_del_mux_adapter(struct i2c_adapter *adap) +{ + struct i2c_mux_priv *priv = adap->algo_data; + struct i2c_mux_core *muxc = priv->muxc; - sysfs_remove_link(&priv->adap.dev.kobj, "mux_device"); - i2c_del_adapter(adap); - kfree(priv); + i2c_mux_del_adapters(muxc); + devm_kfree(muxc->dev, muxc); } EXPORT_SYMBOL_GPL(i2c_del_mux_adapter); diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index b5f9a007a3ab..71ac1b3f4f68 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -27,6 +27,31 @@ #ifdef __KERNEL__ +struct i2c_mux_core { + struct i2c_adapter *parent; + struct device *dev; + + void *priv; + + int (*select)(struct i2c_mux_core *, u32 chan_id); + int (*deselect)(struct i2c_mux_core *, u32 chan_id); + + int num_adapters; + int max_adapters; + struct i2c_adapter *adapter[0]; +}; + +struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, + struct device *dev, int max_adapters, + int sizeof_priv, u32 flags, + int (*select)(struct i2c_mux_core *, u32), + int (*deselect)(struct i2c_mux_core *, u32)); + +static inline void *i2c_mux_priv(struct i2c_mux_core *muxc) +{ + return muxc->priv; +} + /* * Called to create a i2c bus on a multiplexed bus segment. * The mux_dev and chan_id parameters are passed to the select @@ -41,8 +66,17 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, void *mux_dev, u32 chan_id), int (*deselect) (struct i2c_adapter *, void *mux_dev, u32 chan_id)); +/* + * Called to create an i2c bus on a multiplexed bus segment. + * The chan_id parameter is passed to the select and deselect + * callback functions to perform hardware-specific mux control. + */ +int i2c_mux_add_adapter(struct i2c_mux_core *muxc, + u32 force_nr, u32 chan_id, + unsigned int class); void i2c_del_mux_adapter(struct i2c_adapter *adap); +void i2c_mux_del_adapters(struct i2c_mux_core *muxc); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 23fe440c59b9f08afe108e7ec7b6714cb2a3b955 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 2 Mar 2016 15:14:22 +0100 Subject: i2c: mux: drop old unused i2c-mux api All i2c mux users are using an explicit i2c mux core, drop support for implicit i2c mux cores. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-mux.c | 63 ------------------------------------------------- include/linux/i2c-mux.h | 15 ------------ 2 files changed, 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 5ce1b0704cb5..25e9336b0e6e 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -28,12 +28,6 @@ #include /* multiplexer per channel data */ -struct i2c_mux_priv_old { - void *mux_priv; - int (*select)(struct i2c_adapter *, void *mux_priv, u32 chan_id); - int (*deselect)(struct i2c_adapter *, void *mux_priv, u32 chan_id); -}; - struct i2c_mux_priv { struct i2c_adapter adap; struct i2c_algorithm algo; @@ -104,53 +98,6 @@ static unsigned int i2c_mux_parent_classes(struct i2c_adapter *parent) return class; } -static int i2c_mux_select(struct i2c_mux_core *muxc, u32 chan) -{ - struct i2c_mux_priv_old *priv = i2c_mux_priv(muxc); - - return priv->select(muxc->parent, priv->mux_priv, chan); -} - -static int i2c_mux_deselect(struct i2c_mux_core *muxc, u32 chan) -{ - struct i2c_mux_priv_old *priv = i2c_mux_priv(muxc); - - return priv->deselect(muxc->parent, priv->mux_priv, chan); -} - -struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, - struct device *mux_dev, - void *mux_priv, u32 force_nr, u32 chan_id, - unsigned int class, - int (*select) (struct i2c_adapter *, - void *, u32), - int (*deselect) (struct i2c_adapter *, - void *, u32)) -{ - struct i2c_mux_core *muxc; - struct i2c_mux_priv_old *priv; - int ret; - - muxc = i2c_mux_alloc(parent, mux_dev, 1, sizeof(*priv), 0, - i2c_mux_select, i2c_mux_deselect); - if (!muxc) - return NULL; - - priv = i2c_mux_priv(muxc); - priv->select = select; - priv->deselect = deselect; - priv->mux_priv = mux_priv; - - ret = i2c_mux_add_adapter(muxc, force_nr, chan_id, class); - if (ret) { - devm_kfree(mux_dev, muxc); - return NULL; - } - - return muxc->adapter[0]; -} -EXPORT_SYMBOL_GPL(i2c_add_mux_adapter); - struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, struct device *dev, int max_adapters, int sizeof_priv, u32 flags, @@ -305,16 +252,6 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) } EXPORT_SYMBOL_GPL(i2c_mux_del_adapters); -void i2c_del_mux_adapter(struct i2c_adapter *adap) -{ - struct i2c_mux_priv *priv = adap->algo_data; - struct i2c_mux_core *muxc = priv->muxc; - - i2c_mux_del_adapters(muxc); - devm_kfree(muxc->dev, muxc); -} -EXPORT_SYMBOL_GPL(i2c_del_mux_adapter); - MODULE_AUTHOR("Rodolfo Giometti "); MODULE_DESCRIPTION("I2C driver for multiplexed I2C busses"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index 71ac1b3f4f68..2fa93fe1345e 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -52,20 +52,6 @@ static inline void *i2c_mux_priv(struct i2c_mux_core *muxc) return muxc->priv; } -/* - * Called to create a i2c bus on a multiplexed bus segment. - * The mux_dev and chan_id parameters are passed to the select - * and deselect callback functions to perform hardware-specific - * mux control. - */ -struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, - struct device *mux_dev, - void *mux_priv, u32 force_nr, u32 chan_id, - unsigned int class, - int (*select) (struct i2c_adapter *, - void *mux_dev, u32 chan_id), - int (*deselect) (struct i2c_adapter *, - void *mux_dev, u32 chan_id)); /* * Called to create an i2c bus on a multiplexed bus segment. * The chan_id parameter is passed to the select and deselect @@ -75,7 +61,6 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, u32 force_nr, u32 chan_id, unsigned int class); -void i2c_del_mux_adapter(struct i2c_adapter *adap); void i2c_mux_del_adapters(struct i2c_mux_core *muxc); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 02fad5e9b433da3829d39f0afb3c51b4b6409ed5 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 9 Mar 2016 18:16:54 -0600 Subject: clocksource: Add missing include of of.h. This header uses OF_DELCARE_1 which is defined in linux/of.h. This fixes getting unhelpful compiler error messages about missing ')' before a string constant. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: David Lechner Signed-off-by: John Stultz --- include/linux/clocksource.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a307bf62974f..44a1aff22566 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 457db29bfcfd1d9cc717587c446a89d60499d4a9 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 8 Apr 2016 14:02:11 +0800 Subject: security: Introduce security_settime64() security_settime() uses a timespec, which is not year 2038 safe on 32bit systems. Thus this patch introduces the security_settime64() function with timespec64 type. We also convert the cap_settime() helper function to use the 64bit types. This patch then moves security_settime() to the header file as an inline helper function so that existing users can be iteratively converted. None of the existing hooks is using the timespec argument and therefor the patch is not making any functional changes. Cc: Serge Hallyn , Cc: James Morris , Cc: "Serge E. Hallyn" , Cc: Paul Moore Cc: Stephen Smalley Cc: Kees Cook Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Reviewed-by: James Morris Signed-off-by: Baolin Wang [jstultz: Reworded commit message] Signed-off-by: John Stultz --- include/linux/lsm_hooks.h | 5 +++-- include/linux/security.h | 20 +++++++++++++++++--- security/commoncap.c | 2 +- security/security.c | 2 +- 4 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cdee11cbcdf1..41ab4662f95c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1190,7 +1190,8 @@ * Return 0 if permission is granted. * @settime: * Check permission to change the system time. - * struct timespec and timezone are defined in include/linux/time.h + * struct timespec64 is defined in include/linux/time64.h and timezone + * is defined in include/linux/time.h * @ts contains new time * @tz contains new timezone * Return 0 if permission is granted. @@ -1327,7 +1328,7 @@ union security_list_options { int (*quotactl)(int cmds, int type, int id, struct super_block *sb); int (*quota_on)(struct dentry *dentry); int (*syslog)(int type); - int (*settime)(const struct timespec *ts, const struct timezone *tz); + int (*settime)(const struct timespec64 *ts, const struct timezone *tz); int (*vm_enough_memory)(struct mm_struct *mm, long pages); int (*bprm_set_creds)(struct linux_binprm *bprm); diff --git a/include/linux/security.h b/include/linux/security.h index 157f0cb1e4d2..35ac8d9d4739 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -71,7 +71,7 @@ struct timezone; /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit); -extern int cap_settime(const struct timespec *ts, const struct timezone *tz); +extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz); extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -208,7 +208,13 @@ int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns, int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); -int security_settime(const struct timespec *ts, const struct timezone *tz); +int security_settime64(const struct timespec64 *ts, const struct timezone *tz); +static inline int security_settime(const struct timespec *ts, const struct timezone *tz) +{ + struct timespec64 ts64 = timespec_to_timespec64(*ts); + + return security_settime64(&ts64, tz); +} int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_set_creds(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); @@ -462,10 +468,18 @@ static inline int security_syslog(int type) return 0; } +static inline int security_settime64(const struct timespec64 *ts, + const struct timezone *tz) +{ + return cap_settime(ts, tz); +} + static inline int security_settime(const struct timespec *ts, const struct timezone *tz) { - return cap_settime(ts, tz); + struct timespec64 ts64 = timespec_to_timespec64(*ts); + + return cap_settime(&ts64, tz); } static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) diff --git a/security/commoncap.c b/security/commoncap.c index 48071ed7c445..2074bf6a2fe3 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -111,7 +111,7 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, * Determine whether the current process may set the system clock and timezone * information, returning 0 if permission granted, -ve if denied. */ -int cap_settime(const struct timespec *ts, const struct timezone *tz) +int cap_settime(const struct timespec64 *ts, const struct timezone *tz) { if (!capable(CAP_SYS_TIME)) return -EPERM; diff --git a/security/security.c b/security/security.c index 3644b0344d29..8c44a64f191d 100644 --- a/security/security.c +++ b/security/security.c @@ -208,7 +208,7 @@ int security_syslog(int type) return call_int_hook(syslog, 0, type); } -int security_settime(const struct timespec *ts, const struct timezone *tz) +int security_settime64(const struct timespec64 *ts, const struct timezone *tz) { return call_int_hook(settime, 0, ts, tz); } -- cgit v1.2.3 From 86d3473224b004f920c107206d181d37db735145 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 8 Apr 2016 14:02:12 +0800 Subject: time: Introduce do_sys_settimeofday64() The do_sys_settimeofday() function uses a timespec, which is not year 2038 safe on 32bit systems. Thus this patch introduces do_sys_settimeofday64(), which allows us to transition users of do_sys_settimeofday() to using 64bit time types. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Baolin Wang [jstultz: Include errno-base.h to avoid build issue on some arches] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 17 +++++++++++++++-- kernel/time/time.c | 8 ++++---- 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 96f37bee3bc1..37dbacf84849 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TIMEKEEPING_H #define _LINUX_TIMEKEEPING_H +#include + /* Included from linux/ktime.h */ void timekeeping_init(void); @@ -11,8 +13,19 @@ extern int timekeeping_suspended; */ extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday64(const struct timespec64 *ts); -extern int do_sys_settimeofday(const struct timespec *tv, - const struct timezone *tz); +extern int do_sys_settimeofday64(const struct timespec64 *tv, + const struct timezone *tz); +static inline int do_sys_settimeofday(const struct timespec *tv, + const struct timezone *tz) +{ + struct timespec64 ts64; + + if (!tv) + return -EINVAL; + + ts64 = timespec_to_timespec64(*tv); + return do_sys_settimeofday64(&ts64, tz); +} /* * Kernel time accessors diff --git a/kernel/time/time.c b/kernel/time/time.c index be115b020d27..a4064b612066 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -160,15 +160,15 @@ static inline void warp_clock(void) * various programs will get confused when the clock gets warped. */ -int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) +int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz) { static int firsttime = 1; int error = 0; - if (tv && !timespec_valid(tv)) + if (tv && !timespec64_valid(tv)) return -EINVAL; - error = security_settime(tv, tz); + error = security_settime64(tv, tz); if (error) return error; @@ -186,7 +186,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) } } if (tv) - return do_settimeofday(tv); + return do_settimeofday64(tv); return 0; } -- cgit v1.2.3 From 9d90725ddca347450c4ab177ad680ed76063afd4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 18 Mar 2016 11:27:36 -0700 Subject: libnvdimm, blk: move i/o infrastructure to nd_namespace_blk Consolidate the information for issuing i/o to a blk-namespace, and eliminate some pointer chasing. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 137 ++++++++++++++++++++++++++------------------------- include/linux/nd.h | 2 + 2 files changed, 71 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 26d039879ba2..4c14ecdc792b 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -21,17 +21,19 @@ #include #include "nd.h" -struct nd_blk_device { - struct nd_namespace_blk *nsblk; - struct nd_blk_region *ndbr; - size_t disk_size; - u32 sector_size; - u32 internal_lbasize; -}; +static u32 nsblk_meta_size(struct nd_namespace_blk *nsblk) +{ + return nsblk->lbasize - ((nsblk->lbasize >= 4096) ? 4096 : 512); +} -static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) +static u32 nsblk_internal_lbasize(struct nd_namespace_blk *nsblk) { - return blk_dev->nsblk->lbasize - blk_dev->sector_size; + return roundup(nsblk->lbasize, INT_LBASIZE_ALIGNMENT); +} + +static u32 nsblk_sector_size(struct nd_namespace_blk *nsblk) +{ + return nsblk->lbasize - nsblk_meta_size(nsblk); } static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, @@ -55,20 +57,29 @@ static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, return SIZE_MAX; } +static struct nd_blk_region *to_ndbr(struct nd_namespace_blk *nsblk) +{ + struct nd_region *nd_region; + struct device *parent; + + parent = nsblk->common.dev.parent; + nd_region = container_of(parent, struct nd_region, dev); + return container_of(nd_region, struct nd_blk_region, nd_region); +} + #ifdef CONFIG_BLK_DEV_INTEGRITY -static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, - struct bio_integrity_payload *bip, u64 lba, - int rw) +static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk, + struct bio_integrity_payload *bip, u64 lba, int rw) { - unsigned int len = nd_blk_meta_size(blk_dev); + struct nd_blk_region *ndbr = to_ndbr(nsblk); + unsigned int len = nsblk_meta_size(nsblk); resource_size_t dev_offset, ns_offset; - struct nd_namespace_blk *nsblk; - struct nd_blk_region *ndbr; + u32 internal_lbasize, sector_size; int err = 0; - nsblk = blk_dev->nsblk; - ndbr = blk_dev->ndbr; - ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; + internal_lbasize = nsblk_internal_lbasize(nsblk); + sector_size = nsblk_sector_size(nsblk); + ns_offset = lba * internal_lbasize + sector_size; dev_offset = to_dev_offset(nsblk, ns_offset, len); if (dev_offset == SIZE_MAX) return -EIO; @@ -102,25 +113,26 @@ static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, } #else /* CONFIG_BLK_DEV_INTEGRITY */ -static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, - struct bio_integrity_payload *bip, u64 lba, - int rw) +static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk, + struct bio_integrity_payload *bip, u64 lba, int rw) { return 0; } #endif -static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, - struct bio_integrity_payload *bip, struct page *page, - unsigned int len, unsigned int off, int rw, - sector_t sector) +static int nsblk_do_bvec(struct nd_namespace_blk *nsblk, + struct bio_integrity_payload *bip, struct page *page, + unsigned int len, unsigned int off, int rw, sector_t sector) { - struct nd_blk_region *ndbr = blk_dev->ndbr; + struct nd_blk_region *ndbr = to_ndbr(nsblk); resource_size_t dev_offset, ns_offset; + u32 internal_lbasize, sector_size; int err = 0; void *iobuf; u64 lba; + internal_lbasize = nsblk_internal_lbasize(nsblk); + sector_size = nsblk_sector_size(nsblk); while (len) { unsigned int cur_len; @@ -130,11 +142,11 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, * Block Window setup/move steps. the do_io routine is capable * of handling len <= PAGE_SIZE. */ - cur_len = bip ? min(len, blk_dev->sector_size) : len; + cur_len = bip ? min(len, sector_size) : len; - lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); - ns_offset = lba * blk_dev->internal_lbasize; - dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); + lba = div_u64(sector << SECTOR_SHIFT, sector_size); + ns_offset = lba * internal_lbasize; + dev_offset = to_dev_offset(nsblk, ns_offset, cur_len); if (dev_offset == SIZE_MAX) return -EIO; @@ -145,13 +157,13 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, return err; if (bip) { - err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); + err = nd_blk_rw_integrity(nsblk, bip, lba, rw); if (err) return err; } len -= cur_len; off += cur_len; - sector += blk_dev->sector_size >> SECTOR_SHIFT; + sector += sector_size >> SECTOR_SHIFT; } return err; @@ -160,7 +172,7 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) { struct bio_integrity_payload *bip; - struct nd_blk_device *blk_dev; + struct nd_namespace_blk *nsblk; struct bvec_iter iter; unsigned long start; struct bio_vec bvec; @@ -179,17 +191,17 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) } bip = bio_integrity(bio); - blk_dev = q->queuedata; + nsblk = q->queuedata; rw = bio_data_dir(bio); do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; BUG_ON(len > PAGE_SIZE); - err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, - bvec.bv_offset, rw, iter.bi_sector); + err = nsblk_do_bvec(nsblk, bip, bvec.bv_page, len, + bvec.bv_offset, rw, iter.bi_sector); if (err) { - dev_dbg(&blk_dev->nsblk->common.dev, + dev_dbg(&nsblk->common.dev, "io error in %s sector %lld, len %d,\n", (rw == READ) ? "READ" : "WRITE", (unsigned long long) iter.bi_sector, len); @@ -205,17 +217,16 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } -static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, +static int nsblk_rw_bytes(struct nd_namespace_common *ndns, resource_size_t offset, void *iobuf, size_t n, int rw) { - struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); - struct nd_namespace_blk *nsblk = blk_dev->nsblk; - struct nd_blk_region *ndbr = blk_dev->ndbr; + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev); + struct nd_blk_region *ndbr = to_ndbr(nsblk); resource_size_t dev_offset; dev_offset = to_dev_offset(nsblk, offset, n); - if (unlikely(offset + n > blk_dev->disk_size)) { + if (unlikely(offset + n > nsblk->size)) { dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); return -EFAULT; } @@ -242,16 +253,16 @@ static void nd_blk_release_disk(void *disk) put_disk(disk); } -static int nd_blk_attach_disk(struct device *dev, - struct nd_namespace_common *ndns, struct nd_blk_device *blk_dev) +static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) { + struct device *dev = &nsblk->common.dev; resource_size_t available_disk_size; struct request_queue *q; struct gendisk *disk; u64 internal_nlba; - internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); - available_disk_size = internal_nlba * blk_dev->sector_size; + internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk)); + available_disk_size = internal_nlba * nsblk_sector_size(nsblk); q = blk_alloc_queue(GFP_KERNEL); if (!q) @@ -264,9 +275,9 @@ static int nd_blk_attach_disk(struct device *dev, blk_queue_make_request(q, nd_blk_make_request); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); - blk_queue_logical_block_size(q, blk_dev->sector_size); + blk_queue_logical_block_size(q, nsblk_sector_size(nsblk)); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); - q->queuedata = blk_dev; + q->queuedata = nsblk; disk = alloc_disk(0); if (!disk) @@ -276,17 +287,17 @@ static int nd_blk_attach_disk(struct device *dev, return -ENOMEM; } - disk->driverfs_dev = &ndns->dev; + disk->driverfs_dev = dev; disk->first_minor = 0; disk->fops = &nd_blk_fops; disk->queue = q; disk->flags = GENHD_FL_EXT_DEVT; - nvdimm_namespace_disk_name(ndns, disk->disk_name); + nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name); set_capacity(disk, 0); add_disk(disk); - if (nd_blk_meta_size(blk_dev)) { - int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); + if (nsblk_meta_size(nsblk)) { + int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk)); if (rc) return rc; @@ -301,33 +312,23 @@ static int nd_blk_probe(struct device *dev) { struct nd_namespace_common *ndns; struct nd_namespace_blk *nsblk; - struct nd_blk_device *blk_dev; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); - blk_dev = devm_kzalloc(dev, sizeof(*blk_dev), GFP_KERNEL); - if (!blk_dev) - return -ENOMEM; - nsblk = to_nd_namespace_blk(&ndns->dev); - blk_dev->disk_size = nvdimm_namespace_capacity(ndns); - blk_dev->ndbr = to_nd_blk_region(dev->parent); - blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); - blk_dev->internal_lbasize = roundup(nsblk->lbasize, - INT_LBASIZE_ALIGNMENT); - blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); - dev_set_drvdata(dev, blk_dev); - - ndns->rw_bytes = nd_blk_rw_bytes; + nsblk->size = nvdimm_namespace_capacity(ndns); + dev_set_drvdata(dev, nsblk); + + ndns->rw_bytes = nsblk_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(dev, ndns, blk_dev) == 0) { + else if (nd_btt_probe(dev, ndns, nsblk) == 0) { /* we'll come back as btt-blk */ return -ENXIO; } else - return nd_blk_attach_disk(dev, ndns, blk_dev); + return nsblk_attach_disk(nsblk); } static int nd_blk_remove(struct device *dev) diff --git a/include/linux/nd.h b/include/linux/nd.h index 5489ab756d1a..5ea4aec7fd63 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -82,6 +82,7 @@ struct nd_namespace_pmem { * @uuid: namespace name supplied in the dimm label * @id: ida allocated id * @lbasize: blk namespaces have a native sector size when btt not present + * @size: sum of all the resource ranges allocated to this namespace * @num_resources: number of dpa extents to claim * @res: discontiguous dpa extents for given dimm */ @@ -91,6 +92,7 @@ struct nd_namespace_blk { u8 *uuid; int id; unsigned long lbasize; + resource_size_t size; int num_resources; struct resource **res; }; -- cgit v1.2.3 From 200c79da824c978fcf6eec1dc9c0a1e521133267 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 Mar 2016 00:22:16 -0700 Subject: libnvdimm, pmem, pfn: make pmem_rw_bytes generic and refactor pfn setup In preparation for providing an alternative (to block device) access mechanism to persistent memory, convert pmem_rw_bytes() to nsio_rw_bytes(). This allows ->rw_bytes() functionality without requiring a 'struct pmem_device' to be instantiated. In other words, when ->rw_bytes() is in use i/o is driven through 'struct nd_namespace_io', otherwise it is driven through 'struct pmem_device' and the block layer. This consolidates the disjoint calls to devm_exit_badblocks() and devm_memunmap() into a common devm_nsio_disable() and cleans up the init path to use a unified pmem_attach_disk() implementation. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 2 +- drivers/nvdimm/btt_devs.c | 4 +- drivers/nvdimm/claim.c | 61 ++++++++++ drivers/nvdimm/nd.h | 40 +++++-- drivers/nvdimm/pfn_devs.c | 4 +- drivers/nvdimm/pmem.c | 236 ++++++++++++++------------------------ include/linux/nd.h | 9 +- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 27 +++-- 9 files changed, 211 insertions(+), 173 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 4c14ecdc792b..495e06d9f7e7 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -324,7 +324,7 @@ static int nd_blk_probe(struct device *dev) ndns->rw_bytes = nsblk_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(dev, ndns, nsblk) == 0) { + else if (nd_btt_probe(dev, ndns) == 0) { /* we'll come back as btt-blk */ return -ENXIO; } else diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 1886171af80e..816d0dae6398 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -273,8 +273,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, return 0; } -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct device *btt_dev; @@ -289,7 +288,6 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!btt_dev) return -ENOMEM; - dev_set_drvdata(btt_dev, drvdata); btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL); rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb); dev_dbg(dev, "%s: btt: %s\n", __func__, diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index e8f03b0e95e4..6bbd0a36994a 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -12,6 +12,7 @@ */ #include #include +#include #include "nd-core.h" #include "pfn.h" #include "btt.h" @@ -199,3 +200,63 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) return sum; } EXPORT_SYMBOL(nd_sb_checksum); + +static int nsio_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size, int rw) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + if (unlikely(offset + size > nsio->size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (rw == READ) { + unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); + + if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align))) + return -EIO; + return memcpy_from_pmem(buf, nsio->addr + offset, size); + } else { + memcpy_to_pmem(nsio->addr + offset, buf, size); + wmb_pmem(); + } + + return 0; +} + +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + struct nd_namespace_common *ndns = &nsio->common; + + nsio->size = resource_size(res); + if (!devm_request_mem_region(dev, res->start, resource_size(res), + dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", res); + return -EBUSY; + } + + ndns->rw_bytes = nsio_rw_bytes; + if (devm_init_badblocks(dev, &nsio->bb)) + return -ENOMEM; + nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb, + &nsio->res); + + nsio->addr = devm_memremap(dev, res->start, resource_size(res), + ARCH_MEMREMAP_PMEM); + if (IS_ERR(nsio->addr)) + return PTR_ERR(nsio->addr); + return 0; +} +EXPORT_SYMBOL_GPL(devm_nsio_enable); + +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + + devm_memunmap(dev, nsio->addr); + devm_exit_badblocks(dev, &nsio->bb); + devm_release_mem_region(dev, res->start, resource_size(res)); +} +EXPORT_SYMBOL_GPL(devm_nsio_disable); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 0fb14890ba26..10e23fe49012 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -13,6 +13,7 @@ #ifndef __ND_H__ #define __ND_H__ #include +#include #include #include #include @@ -197,13 +198,12 @@ struct nd_gen_sb { u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else static inline int nd_btt_probe(struct device *dev, - struct nd_namespace_common *ndns, void *drvdata) + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -221,14 +221,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); int nd_pfn_validate(struct nd_pfn *nd_pfn); #else -static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +static inline int nd_pfn_probe(struct device *dev, + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -272,6 +271,20 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); +#if IS_ENABLED(CONFIG_ND_CLAIM) +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); +#else +static inline int devm_nsio_enable(struct device *dev, + struct nd_namespace_io *nsio) +{ + return -ENXIO; +} +static inline void devm_nsio_disable(struct device *dev, + struct nd_namespace_io *nsio) +{ +} +#endif int nd_blk_region_init(struct nd_region *nd_region); void __nd_iostat_start(struct bio *bio, unsigned long *start); static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) @@ -285,6 +298,19 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) return true; } void nd_iostat_end(struct bio *bio, unsigned long start); +static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector, + unsigned int len) +{ + if (bb->count) { + sector_t first_bad; + int num_bad; + + return !!badblocks_check(bb, sector, len / 512, &first_bad, + &num_bad); + } + + return false; +} resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); const u8 *nd_dev_to_uuid(struct device *dev); bool pmem_should_map_pages(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 96aa5490c279..9df081ae96e3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -410,8 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) } EXPORT_SYMBOL(nd_pfn_validate); -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct nd_pfn *nd_pfn; @@ -427,7 +426,6 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; - dev_set_drvdata(pfn_dev, drvdata); pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn = to_nd_pfn(pfn_dev); nd_pfn->pfn_sb = pfn_sb; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 67d48e2e8ca2..b5f81b02205c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -49,19 +49,6 @@ struct pmem_device { struct badblocks bb; }; -static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) -{ - if (bb->count) { - sector_t first_bad; - int num_bad; - - return !!badblocks_check(bb, sector, len / 512, &first_bad, - &num_bad); - } - - return false; -} - static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { @@ -209,16 +196,40 @@ void pmem_release_disk(void *disk) put_disk(disk); } -static struct pmem_device *pmem_alloc(struct device *dev, - struct resource *res, int id) +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap); + +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns) { + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct vmem_altmap __altmap, *altmap = NULL; + struct resource *res = &nsio->res; + struct nd_pfn *nd_pfn = NULL; + int nid = dev_to_node(dev); + struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; + struct resource pfn_res; struct request_queue *q; + struct gendisk *disk; + void *addr; + + /* while nsio_rw_bytes is active, parse a pfn info block if present */ + if (is_nd_pfn(dev)) { + nd_pfn = to_nd_pfn(dev); + altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + } + + /* we're attaching a block device, disable raw namespace access */ + devm_nsio_disable(dev, nsio); pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); if (!arch_has_wmb_pmem()) @@ -227,22 +238,31 @@ static struct pmem_device *pmem_alloc(struct device *dev, if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(dev))) { dev_warn(dev, "could not reserve region %pR\n", res); - return ERR_PTR(-EBUSY); + return -EBUSY; } q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); if (!q) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + pmem->pmem_queue = q; pmem->pfn_flags = PFN_DEV; - if (pmem_should_map_pages(dev)) { - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, + if (is_nd_pfn(dev)) { + addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, + altmap); + pfn_sb = nd_pfn->pfn_sb; + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); + pmem->pfn_flags |= PFN_MAP; + res = &pfn_res; /* for badblocks populate */ + res->start += pmem->data_offset; + } else if (pmem_should_map_pages(dev)) { + addr = devm_memremap_pages(dev, &nsio->res, &q->q_usage_counter, NULL); pmem->pfn_flags |= PFN_MAP; } else - pmem->virt_addr = (void __pmem *) devm_memremap(dev, - pmem->phys_addr, pmem->size, - ARCH_MEMREMAP_PMEM); + addr = devm_memremap(dev, pmem->phys_addr, + pmem->size, ARCH_MEMREMAP_PMEM); /* * At release time the queue must be dead before @@ -250,23 +270,12 @@ static struct pmem_device *pmem_alloc(struct device *dev, */ if (devm_add_action(dev, pmem_release_queue, q)) { blk_cleanup_queue(q); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } - if (IS_ERR(pmem->virt_addr)) - return (void __force *) pmem->virt_addr; - - pmem->pmem_queue = q; - return pmem; -} - -static int pmem_attach_disk(struct device *dev, - struct nd_namespace_common *ndns, struct pmem_device *pmem) -{ - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - int nid = dev_to_node(dev); - struct resource bb_res; - struct gendisk *disk; + if (IS_ERR(addr)) + return PTR_ERR(addr); + pmem->virt_addr = (void __pmem *) addr; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); @@ -291,20 +300,9 @@ static int pmem_attach_disk(struct device *dev, set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) / 512); pmem->pmem_disk = disk; - devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - bb_res.start = nsio->res.start + pmem->data_offset; - bb_res.end = nsio->res.end; - if (is_nd_pfn(dev)) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - - bb_res.start += __le32_to_cpu(pfn_sb->start_pad); - bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc); - } - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, - &bb_res); + nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); disk->bb = &pmem->bb; add_disk(disk); revalidate_disk(disk); @@ -312,33 +310,8 @@ static int pmem_attach_disk(struct device *dev, return 0; } -static int pmem_rw_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size, int rw) -{ - struct pmem_device *pmem = dev_get_drvdata(ndns->claim); - - if (unlikely(offset + size > pmem->size)) { - dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); - return -EFAULT; - } - - if (rw == READ) { - unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); - - if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align))) - return -EIO; - return memcpy_from_pmem(buf, pmem->virt_addr + offset, size); - } else { - memcpy_to_pmem(pmem->virt_addr + offset, buf, size); - wmb_pmem(); - } - - return 0; -} - static int nd_pfn_init(struct nd_pfn *nd_pfn) { - struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; u32 start_pad = 0, end_trunc = 0; resource_size_t start, size; @@ -404,7 +377,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * ->direct_access() to those that are included in the memmap. */ start += start_pad; - npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; + size = resource_size(&nsio->res); + npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) - start; @@ -413,13 +387,13 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) else return -ENXIO; - if (offset + start_pad + end_trunc >= pmem->size) { + if (offset + start_pad + end_trunc >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } - npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; + npfns = (size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -456,17 +430,14 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) +static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct resource res; - struct request_queue *q; - struct pmem_device *pmem; - struct vmem_altmap *altmap; - struct device *dev = &nd_pfn->dev; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - struct nd_namespace_common *ndns = nd_pfn->ndns; + u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { @@ -474,112 +445,75 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) .reserve = init_altmap_reserve(base), }; - pmem = dev_get_drvdata(dev); - pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); - pmem->pfn_pad = start_pad + end_trunc; + memcpy(res, &nsio->res, sizeof(*res)); + res->start += start_pad; + res->end -= end_trunc; + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (pmem->data_offset < SZ_8K) - return -EINVAL; + if (offset < SZ_8K) + return ERR_PTR(-EINVAL); nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) - / PAGE_SIZE; + nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); - altmap = & __altmap; - altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); + memcpy(altmap, &__altmap, sizeof(*altmap)); + altmap->free = PHYS_PFN(offset - SZ_8K); altmap->alloc = 0; } else - return -ENXIO; + return ERR_PTR(-ENXIO); - /* establish pfn range for lookup, and switch to direct map */ - q = pmem->pmem_queue; - memcpy(&res, &nsio->res, sizeof(res)); - res.start += start_pad; - res.end -= end_trunc; - devm_remove_action(dev, pmem_release_queue, q); - devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, - &q->q_usage_counter, altmap); - pmem->pfn_flags |= PFN_MAP; - - /* - * At release time the queue must be dead before - * devm_memremap_pages is unwound - */ - if (devm_add_action(dev, pmem_release_queue, q)) { - blk_cleanup_queue(q); - return -ENOMEM; - } - if (IS_ERR(pmem->virt_addr)) - return PTR_ERR(pmem->virt_addr); - - /* attach pmem disk in "pfn-mode" */ - return pmem_attach_disk(dev, ndns, pmem); + return altmap; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +/* + * Determine the effective resource range and vmem_altmap from an nd_pfn + * instance. + */ +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); int rc; if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; + return ERR_PTR(-ENODEV); rc = nd_pfn_init(nd_pfn); if (rc) - return rc; + return ERR_PTR(rc); + /* we need a valid pfn_sb before we can init a vmem_altmap */ - return __nvdimm_namespace_attach_pfn(nd_pfn); + return __nvdimm_setup_pfn(nd_pfn, res, altmap); } static int nd_pmem_probe(struct device *dev) { - struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_namespace_common *ndns; - struct nd_namespace_io *nsio; - struct pmem_device *pmem; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); - nsio = to_nd_namespace_io(&ndns->dev); - pmem = pmem_alloc(dev, &nsio->res, nd_region->id); - if (IS_ERR(pmem)) - return PTR_ERR(pmem); - - dev_set_drvdata(dev, pmem); - ndns->rw_bytes = pmem_rw_bytes; - if (devm_init_badblocks(dev, &pmem->bb)) - return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res); + if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev))) + return -ENXIO; - if (is_nd_btt(dev)) { - /* btt allocates its own request_queue */ - devm_remove_action(dev, pmem_release_queue, pmem->pmem_queue); - blk_cleanup_queue(pmem->pmem_queue); + if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - } if (is_nd_pfn(dev)) - return nvdimm_namespace_attach_pfn(ndns); + return pmem_attach_disk(dev, ndns); - if (nd_btt_probe(dev, ndns, pmem) == 0 - || nd_pfn_probe(dev, ndns, pmem) == 0) { - /* - * We'll come back as either btt-pmem, or pfn-pmem, so - * drop the queue allocation for now. - */ + /* if we find a valid info-block we'll come back as that personality */ + if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0) return -ENXIO; - } - return pmem_attach_disk(dev, ndns, pmem); + /* ...otherwise we're just a raw pmem device */ + return pmem_attach_disk(dev, ndns); } static int nd_pmem_remove(struct device *dev) diff --git a/include/linux/nd.h b/include/linux/nd.h index 5ea4aec7fd63..aee2761d294c 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -15,6 +15,7 @@ #include #include #include +#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -55,13 +56,19 @@ static inline struct nd_namespace_common *to_ndns(struct device *dev) } /** - * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * struct nd_namespace_io - device representation of a persistent memory range * @dev: namespace device created by the nd region driver * @res: struct resource conversion of a NFIT SPA table + * @size: cached resource_size(@res) for fast path size checks + * @addr: virtual address to access the namespace range + * @bb: badblocks list for the namespace range */ struct nd_namespace_io { struct nd_namespace_common common; struct resource res; + resource_size_t size; + void __pmem *addr; + struct badblocks bb; }; /** diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index a34bfd0c8928..d5bc8c080b44 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -7,6 +7,7 @@ ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap ldflags-y += --wrap=memunmap ldflags-y += --wrap=__devm_request_region +ldflags-y += --wrap=__devm_release_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region ldflags-y += --wrap=devm_memremap_pages diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 0c1a7e65bb81..c842095f2801 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -239,13 +239,11 @@ struct resource *__wrap___devm_request_region(struct device *dev, } EXPORT_SYMBOL(__wrap___devm_request_region); -void __wrap___release_region(struct resource *parent, resource_size_t start, - resource_size_t n) +static bool nfit_test_release_region(struct resource *parent, + resource_size_t start, resource_size_t n) { - struct nfit_test_resource *nfit_res; - if (parent == &iomem_resource) { - nfit_res = get_nfit_res(start); + struct nfit_test_resource *nfit_res = get_nfit_res(start); if (nfit_res) { struct resource *res = nfit_res->res + 1; @@ -254,11 +252,26 @@ void __wrap___release_region(struct resource *parent, resource_size_t start, __func__, start, n, res); else memset(res, 0, sizeof(*res)); - return; + return true; } } - __release_region(parent, start, n); + return false; +} + +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __release_region(parent, start, n); } EXPORT_SYMBOL(__wrap___release_region); +void __wrap___devm_release_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __devm_release_region(dev, parent, start, n); +} +EXPORT_SYMBOL(__wrap___devm_release_region); + MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 9ecda41acb971ebd07c8fb35faf24005c0baea12 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 5 Apr 2016 14:11:18 +0000 Subject: perf/core: Add ::write_backward attribute to perf event This patch introduces 'write_backward' bit to perf_event_attr, which controls the direction of a ring buffer. After set, the corresponding ring buffer is written from end to beginning. This feature is design to support reading from overwritable ring buffer. Ring buffer can be created by mapping a perf event fd. Kernel puts event records into ring buffer, user tooling like perf fetch them from address returned by mmap(). To prevent racing between kernel and tooling, they communicate to each other through 'head' and 'tail' pointers. Kernel maintains 'head' pointer, points it to the next free area (tail of the last record). Tooling maintains 'tail' pointer, points it to the tail of last consumed record (record has already been fetched). Kernel determines the available space in a ring buffer using these two pointers to avoid overwrite unfetched records. By mapping without 'PROT_WRITE', an overwritable ring buffer is created. Different from normal ring buffer, tooling is unable to maintain 'tail' pointer because writing is forbidden. Therefore, for this type of ring buffers, kernel overwrite old records unconditionally, works like flight recorder. This feature would be useful if reading from overwritable ring buffer were as easy as reading from normal ring buffer. However, there's an obscure problem. The following figure demonstrates a full overwritable ring buffer. In this figure, the 'head' pointer points to the end of last record, and a long record 'E' is pending. For a normal ring buffer, a 'tail' pointer would have pointed to position (X), so kernel knows there's no more space in the ring buffer. However, for an overwritable ring buffer, kernel ignore the 'tail' pointer. (X) head . | . V +------+-------+----------+------+---+ |A....A|B.....B|C........C|D....D| | +------+-------+----------+------+---+ Record 'A' is overwritten by event 'E': head | V +--+---+-------+----------+------+---+ |.E|..A|B.....B|C........C|D....D|E..| +--+---+-------+----------+------+---+ Now tooling decides to read from this ring buffer. However, none of these two natural positions, 'head' and the start of this ring buffer, are pointing to the head of a record. Even the full ring buffer can be accessed by tooling, it is unable to find a position to start decoding. The first attempt tries to solve this problem AFAIK can be found from [1]. It makes kernel to maintain 'tail' pointer: updates it when ring buffer is half full. However, this approach introduces overhead to fast path. Test result shows a 1% overhead [2]. In addition, this method utilizes no more tham 50% records. Another attempt can be found from [3], which allows putting the size of an event at the end of each record. This approach allows tooling to find records in a backward manner from 'head' pointer by reading size of a record from its tail. However, because of alignment requirement, it needs 8 bytes to record the size of a record, which is a huge waste. Its performance is also not good, because more data need to be written. This approach also introduces some extra branch instructions to fast path. 'write_backward' is a better solution to this problem. Following figure demonstrates the state of the overwritable ring buffer when 'write_backward' is set before overwriting: head | V +---+------+----------+-------+------+ | |D....D|C........C|B.....B|A....A| +---+------+----------+-------+------+ and after overwriting: head | V +---+------+----------+-------+---+--+ |..E|D....D|C........C|B.....B|A..|E.| +---+------+----------+-------+---+--+ In each situation, 'head' points to the beginning of the newest record. From this record, tooling can iterate over the full ring buffer and fetch records one by one. The only limitation that needs to be considered is back-to-back reading. Due to the non-deterministic of user programs, it is impossible to ensure the ring buffer keeps stable during reading. Consider an extreme situation: tooling is scheduled out after reading record 'D', then a burst of events come, eat up the whole ring buffer (one or multiple rounds). When the tooling process comes back, reading after 'D' is incorrect now. To prevent this problem, we need to find a way to ensure the ring buffer is stable during reading. ioctl(PERF_EVENT_IOC_PAUSE_OUTPUT) is suggested because its overhead is lower than ioctl(PERF_EVENT_IOC_ENABLE). By carefully verifying 'header' pointer, reader can avoid pausing the ring-buffer. For example: /* A union of all possible events */ union perf_event event; p = head = perf_mmap__read_head(); while (true) { /* copy header of next event */ fetch(&event.header, p, sizeof(event.header)); /* read 'head' pointer */ head = perf_mmap__read_head(); /* check overwritten: is the header good? */ if (!verify(sizeof(event.header), p, head)) break; /* copy the whole event */ fetch(&event, p, event.header.size); /* read 'head' pointer again */ head = perf_mmap__read_head(); /* is the whole event good? */ if (!verify(event.header.size, p, head)) break; p += event.header.size; } However, the overhead is high because: a) In-place decoding is not safe. Copying-verifying-decoding is required. b) Fetching 'head' pointer requires additional synchronization. (From Alexei Starovoitov: Even when this trick works, pause is needed for more than stability of reading. When we collect the events into overwrite buffer we're waiting for some other trigger (like all cpu utilization spike or just one cpu running and all others are idle) and when it happens the buffer has valuable info from the past. At this point new events are no longer interesting and buffer should be paused, events read and unpaused until next trigger comes.) This patch utilizes event's default overflow_handler introduced previously. perf_event_output_backward() is created as the default overflow handler for backward ring buffers. To avoid extra overhead to fast path, original perf_event_output() becomes __perf_event_output() and marked '__always_inline'. In theory, there's no extra overhead introduced to fast path. Performance testing: Calling 3000000 times of 'close(-1)', use gettimeofday() to check duration. Use 'perf record -o /dev/null -e raw_syscalls:*' to capture system calls. In ns. Testing environment: CPU : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz Kernel : v4.5.0 MEAN STDVAR BASE 800214.950 2853.083 PRE1 2253846.700 9997.014 PRE2 2257495.540 8516.293 POST 2250896.100 8933.921 Where 'BASE' is pure performance without capturing. 'PRE1' is test result of pure 'v4.5.0' kernel. 'PRE2' is test result before this patch. 'POST' is test result after this patch. See [4] for the detailed experimental setup. Considering the stdvar, this patch doesn't introduce performance overhead to the fast path. [1] http://lkml.iu.edu/hypermail/linux/kernel/1304.1/04584.html [2] http://lkml.iu.edu/hypermail/linux/kernel/1307.1/00535.html [3] http://lkml.iu.edu/hypermail/linux/kernel/1512.0/01265.html [4] http://lkml.kernel.org/g/56F89DCD.1040202@huawei.com Signed-off-by: Wang Nan Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Cc: Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Brendan Gregg Cc: He Kuang Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Zefan Li Link: http://lkml.kernel.org/r/1459865478-53413-1-git-send-email-wangnan0@huawei.com [ Fixed the changelog some more. ] Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 28 +++++++++++++++++++++--- include/uapi/linux/perf_event.h | 3 ++- kernel/events/core.c | 48 ++++++++++++++++++++++++++++++++++++----- kernel/events/ring_buffer.c | 16 +++++++++++++- 4 files changed, 85 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b8b195fbe787..85749ae8cb5f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -834,14 +834,24 @@ extern int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +extern void perf_event_output_forward(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); +extern void perf_event_output_backward(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) { - return (event->overflow_handler == perf_event_output); + if (likely(event->overflow_handler == perf_event_output_forward)) + return true; + if (unlikely(event->overflow_handler == perf_event_output_backward)) + return true; + return false; } extern void @@ -1051,8 +1061,20 @@ static inline bool has_aux(struct perf_event *event) return event->pmu->setup_aux; } +static inline bool is_write_backward(struct perf_event *event) +{ + return !!event->attr.write_backward; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); +extern int perf_output_begin_forward(struct perf_output_handle *handle, + struct perf_event *event, + unsigned int size); +extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_event *event, + unsigned int size); + extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index a3c19034d5f8..43fc8d213472 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -340,7 +340,8 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - __reserved_1 : 37; + write_backward : 1, /* Write ring buffer from end to beginning */ + __reserved_1 : 36; union { __u32 wakeup_events; /* wakeup every n events */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 21ba024c9ed1..eabeb2aec00f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5694,9 +5694,13 @@ void perf_prepare_sample(struct perf_event_header *header, } } -void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) +static void __always_inline +__perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs, + int (*output_begin)(struct perf_output_handle *, + struct perf_event *, + unsigned int)) { struct perf_output_handle handle; struct perf_event_header header; @@ -5706,7 +5710,7 @@ void perf_event_output(struct perf_event *event, perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size)) + if (output_begin(&handle, event, header.size)) goto exit; perf_output_sample(&handle, &header, data, event); @@ -5717,6 +5721,30 @@ exit: rcu_read_unlock(); } +void +perf_event_output_forward(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + __perf_event_output(event, data, regs, perf_output_begin_forward); +} + +void +perf_event_output_backward(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + __perf_event_output(event, data, regs, perf_output_begin_backward); +} + +void +perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + __perf_event_output(event, data, regs, perf_output_begin); +} + /* * read event_id */ @@ -8153,8 +8181,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (overflow_handler) { event->overflow_handler = overflow_handler; event->overflow_handler_context = context; + } else if (is_write_backward(event)){ + event->overflow_handler = perf_event_output_backward; + event->overflow_handler_context = NULL; } else { - event->overflow_handler = perf_event_output; + event->overflow_handler = perf_event_output_forward; event->overflow_handler_context = NULL; } @@ -8388,6 +8419,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) if (output_event->clock != event->clock) goto out; + /* + * Either writing ring buffer from beginning or from end. + * Mixing is not allowed. + */ + if (is_write_backward(output_event) != is_write_backward(event)) + goto out; + /* * If both events generate aux data, they must be on the same PMU */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 60be55a64040..c49bab42dc57 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -230,10 +230,24 @@ out: return -ENOSPC; } +int perf_output_begin_forward(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size) +{ + return __perf_output_begin(handle, event, size, false); +} + +int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size) +{ + return __perf_output_begin(handle, event, size, true); +} + int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, false); + + return __perf_output_begin(handle, event, size, + unlikely(is_write_backward(event))); } unsigned int perf_output_copy(struct perf_output_handle *handle, -- cgit v1.2.3 From cee1afce3053e7aa0793fbd5f2e845fa2cef9e33 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 13 Apr 2016 15:56:50 +0200 Subject: sched/fair: Gather CPU load functions under a more conventional namespace The CPU load update related functions have a weak naming convention currently, starting with update_cpu_load_*() which isn't ideal as "update" is a very generic concept. Since two of these functions are public already (and a third is to come) that's enough to introduce a more conventional naming scheme. So let's do the following rename instead: update_cpu_load_*() -> cpu_load_update_*() Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Chris Metcalf Cc: Christoph Lameter Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1460555812-25375-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- Documentation/trace/ftrace.txt | 10 +++++----- include/linux/sched.h | 4 ++-- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 24 ++++++++++++------------ kernel/sched/sched.h | 4 ++-- kernel/time/tick-sched.c | 2 +- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index f52f297cb406..9857606dd7b7 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -1562,12 +1562,12 @@ Doing the same with chrt -r 5 and function-trace set. -0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit -0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit -0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit - -0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit - -0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz + -0 3dN.1 13us : cpu_load_update_nohz <-tick_nohz_idle_exit + -0 3dN.1 13us : _raw_spin_lock <-cpu_load_update_nohz -0 3dN.1 13us : add_preempt_count <-_raw_spin_lock - -0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz - -0 3dN.2 14us : sched_avg_update <-__update_cpu_load - -0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz + -0 3dN.2 13us : __cpu_load_update <-cpu_load_update_nohz + -0 3dN.2 14us : sched_avg_update <-__cpu_load_update + -0 3dN.2 14us : _raw_spin_unlock <-cpu_load_update_nohz -0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock -0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit -0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit diff --git a/include/linux/sched.h b/include/linux/sched.h index 13c1c1d07270..0b7f6028a50b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -178,9 +178,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void update_cpu_load_nohz(int active); +extern void cpu_load_update_nohz(int active); #else -static inline void update_cpu_load_nohz(int active) { } +static inline void cpu_load_update_nohz(int active) { } #endif extern void dump_cpu_task(int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 06efbb9c9544..c98a2688f390 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2917,7 +2917,7 @@ void scheduler_tick(void) raw_spin_lock(&rq->lock); update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); - update_cpu_load_active(rq); + cpu_load_update_active(rq); calc_global_load_tick(rq); raw_spin_unlock(&rq->lock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c328bd77fe35..ecd81c4ebb56 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4559,7 +4559,7 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) } /** - * __update_cpu_load - update the rq->cpu_load[] statistics + * __cpu_load_update - update the rq->cpu_load[] statistics * @this_rq: The rq to update statistics for * @this_load: The current load * @pending_updates: The number of missed updates @@ -4594,7 +4594,7 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra * term. See the @active paramter. */ -static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, +static void __cpu_load_update(struct rq *this_rq, unsigned long this_load, unsigned long pending_updates, int active) { unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0; @@ -4642,7 +4642,7 @@ static unsigned long weighted_cpuload(const int cpu) } #ifdef CONFIG_NO_HZ_COMMON -static void __update_cpu_load_nohz(struct rq *this_rq, +static void __cpu_load_update_nohz(struct rq *this_rq, unsigned long curr_jiffies, unsigned long load, int active) @@ -4657,7 +4657,7 @@ static void __update_cpu_load_nohz(struct rq *this_rq, * In the NOHZ_FULL case, we were non-idle, we should consider * its weighted load. */ - __update_cpu_load(this_rq, load, pending_updates, active); + __cpu_load_update(this_rq, load, pending_updates, active); } } @@ -4678,7 +4678,7 @@ static void __update_cpu_load_nohz(struct rq *this_rq, * Called from nohz_idle_balance() to update the load ratings before doing the * idle balance. */ -static void update_cpu_load_idle(struct rq *this_rq) +static void cpu_load_update_idle(struct rq *this_rq) { /* * bail if there's load or we're actually up-to-date. @@ -4686,13 +4686,13 @@ static void update_cpu_load_idle(struct rq *this_rq) if (weighted_cpuload(cpu_of(this_rq))) return; - __update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0); + __cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0, 0); } /* * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. */ -void update_cpu_load_nohz(int active) +void cpu_load_update_nohz(int active) { struct rq *this_rq = this_rq(); unsigned long curr_jiffies = READ_ONCE(jiffies); @@ -4702,7 +4702,7 @@ void update_cpu_load_nohz(int active) return; raw_spin_lock(&this_rq->lock); - __update_cpu_load_nohz(this_rq, curr_jiffies, load, active); + __cpu_load_update_nohz(this_rq, curr_jiffies, load, active); raw_spin_unlock(&this_rq->lock); } #endif /* CONFIG_NO_HZ */ @@ -4710,14 +4710,14 @@ void update_cpu_load_nohz(int active) /* * Called from scheduler_tick() */ -void update_cpu_load_active(struct rq *this_rq) +void cpu_load_update_active(struct rq *this_rq) { unsigned long load = weighted_cpuload(cpu_of(this_rq)); /* - * See the mess around update_cpu_load_idle() / update_cpu_load_nohz(). + * See the mess around cpu_load_update_idle() / cpu_load_update_nohz(). */ this_rq->last_load_update_tick = jiffies; - __update_cpu_load(this_rq, load, 1, 1); + __cpu_load_update(this_rq, load, 1, 1); } /* @@ -8031,7 +8031,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) if (time_after_eq(jiffies, rq->next_balance)) { raw_spin_lock_irq(&rq->lock); update_rq_clock(rq); - update_cpu_load_idle(rq); + cpu_load_update_idle(rq); raw_spin_unlock_irq(&rq->lock); rebalance_domains(rq, CPU_IDLE); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a7cbad7b3ad2..32d9e22cfacf 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -31,9 +31,9 @@ extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq); #ifdef CONFIG_SMP -extern void update_cpu_load_active(struct rq *this_rq); +extern void cpu_load_update_active(struct rq *this_rq); #else -static inline void update_cpu_load_active(struct rq *this_rq) { } +static inline void cpu_load_update_active(struct rq *this_rq) { } #endif /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 58e3310c9b21..66bdc9acc283 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -806,7 +806,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int { /* Update jiffies first */ tick_do_update_jiffies64(now); - update_cpu_load_nohz(active); + cpu_load_update_nohz(active); calc_load_exit_idle(); touch_softlockup_watchdog_sched(); -- cgit v1.2.3 From 1f41906a6fda1114debd3898668bd7ab6470ee41 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 13 Apr 2016 15:56:51 +0200 Subject: sched/fair: Correctly handle nohz ticks CPU load accounting Ticks can happen while the CPU is in dynticks-idle or dynticks-singletask mode. In fact "nohz" or "dynticks" only mean that we exit the periodic mode and we try to minimize the ticks as much as possible. The nohz subsystem uses a confusing terminology with the internal state "ts->tick_stopped" which is also available through its public interface with tick_nohz_tick_stopped(). This is a misnomer as the tick is instead reduced with the best effort rather than stopped. In the best case the tick can indeed be actually stopped but there is no guarantee about that. If a timer needs to fire one second later, a tick will fire while the CPU is in nohz mode and this is a very common scenario. Now this confusion happens to be a problem with CPU load updates: cpu_load_update_active() doesn't handle nohz ticks correctly because it assumes that ticks are completely stopped in nohz mode and that cpu_load_update_active() can't be called in dynticks mode. When that happens, the whole previous tickless load is ignored and the function just records the load for the current tick, ignoring potentially long idle periods behind. In order to solve this, we could account the current load for the previous nohz time but there is a risk that we account the load of a task that got freshly enqueued for the whole nohz period. So instead, lets record the dynticks load on nohz frame entry so we know what to record in case of nohz ticks, then use this record to account the tickless load on nohz ticks and nohz frame end. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Chris Metcalf Cc: Christoph Lameter Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1460555812-25375-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++- kernel/sched/fair.c | 97 +++++++++++++++++++++++++++++++----------------- kernel/time/tick-sched.c | 9 +++-- 3 files changed, 72 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0b7f6028a50b..d894f2d61388 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -178,9 +178,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz(int active); +extern void cpu_load_update_nohz_start(void); +extern void cpu_load_update_nohz_stop(void); #else -static inline void cpu_load_update_nohz(int active) { } +static inline void cpu_load_update_nohz_start(void) { } +static inline void cpu_load_update_nohz_stop(void) { } #endif extern void dump_cpu_task(int cpu); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ecd81c4ebb56..b70367a3e1ef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4563,7 +4563,6 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) * @this_rq: The rq to update statistics for * @this_load: The current load * @pending_updates: The number of missed updates - * @active: !0 for NOHZ_FULL * * Update rq->cpu_load[] statistics. This function is usually called every * scheduler tick (TICK_NSEC). @@ -4592,12 +4591,12 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) * load[i]_n = (1 - 1/2^i)^n * load[i]_0 * * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra - * term. See the @active paramter. + * term. */ -static void __cpu_load_update(struct rq *this_rq, unsigned long this_load, - unsigned long pending_updates, int active) +static void cpu_load_update(struct rq *this_rq, unsigned long this_load, + unsigned long pending_updates) { - unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0; + unsigned long tickless_load = this_rq->cpu_load[0]; int i, scale; this_rq->nr_load_updates++; @@ -4642,10 +4641,23 @@ static unsigned long weighted_cpuload(const int cpu) } #ifdef CONFIG_NO_HZ_COMMON -static void __cpu_load_update_nohz(struct rq *this_rq, - unsigned long curr_jiffies, - unsigned long load, - int active) +/* + * There is no sane way to deal with nohz on smp when using jiffies because the + * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading + * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. + * + * Therefore we need to avoid the delta approach from the regular tick when + * possible since that would seriously skew the load calculation. This is why we + * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on + * jiffies deltas for updates happening while in nohz mode (idle ticks, idle + * loop exit, nohz_idle_balance, nohz full exit...) + * + * This means we might still be one tick off for nohz periods. + */ + +static void cpu_load_update_nohz(struct rq *this_rq, + unsigned long curr_jiffies, + unsigned long load) { unsigned long pending_updates; @@ -4657,23 +4669,10 @@ static void __cpu_load_update_nohz(struct rq *this_rq, * In the NOHZ_FULL case, we were non-idle, we should consider * its weighted load. */ - __cpu_load_update(this_rq, load, pending_updates, active); + cpu_load_update(this_rq, load, pending_updates); } } -/* - * There is no sane way to deal with nohz on smp when using jiffies because the - * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading - * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. - * - * Therefore we cannot use the delta approach from the regular tick since that - * would seriously skew the load calculation. However we'll make do for those - * updates happening while idle (nohz_idle_balance) or coming out of idle - * (tick_nohz_idle_exit). - * - * This means we might still be one tick off for nohz periods. - */ - /* * Called from nohz_idle_balance() to update the load ratings before doing the * idle balance. @@ -4686,26 +4685,56 @@ static void cpu_load_update_idle(struct rq *this_rq) if (weighted_cpuload(cpu_of(this_rq))) return; - __cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0, 0); + cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0); } /* - * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. + * Record CPU load on nohz entry so we know the tickless load to account + * on nohz exit. cpu_load[0] happens then to be updated more frequently + * than other cpu_load[idx] but it should be fine as cpu_load readers + * shouldn't rely into synchronized cpu_load[*] updates. */ -void cpu_load_update_nohz(int active) +void cpu_load_update_nohz_start(void) { struct rq *this_rq = this_rq(); + + /* + * This is all lockless but should be fine. If weighted_cpuload changes + * concurrently we'll exit nohz. And cpu_load write can race with + * cpu_load_update_idle() but both updater would be writing the same. + */ + this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq)); +} + +/* + * Account the tickless load in the end of a nohz frame. + */ +void cpu_load_update_nohz_stop(void) +{ unsigned long curr_jiffies = READ_ONCE(jiffies); - unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0; + struct rq *this_rq = this_rq(); + unsigned long load; if (curr_jiffies == this_rq->last_load_update_tick) return; + load = weighted_cpuload(cpu_of(this_rq)); raw_spin_lock(&this_rq->lock); - __cpu_load_update_nohz(this_rq, curr_jiffies, load, active); + cpu_load_update_nohz(this_rq, curr_jiffies, load); raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ +static inline void cpu_load_update_nohz(struct rq *this_rq, + unsigned long curr_jiffies, + unsigned long load) { } +#endif /* CONFIG_NO_HZ_COMMON */ + +static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load) +{ + /* See the mess around cpu_load_update_nohz(). */ + this_rq->last_load_update_tick = READ_ONCE(jiffies); + cpu_load_update(this_rq, load, 1); +} /* * Called from scheduler_tick() @@ -4713,11 +4742,11 @@ void cpu_load_update_nohz(int active) void cpu_load_update_active(struct rq *this_rq) { unsigned long load = weighted_cpuload(cpu_of(this_rq)); - /* - * See the mess around cpu_load_update_idle() / cpu_load_update_nohz(). - */ - this_rq->last_load_update_tick = jiffies; - __cpu_load_update(this_rq, load, 1, 1); + + if (tick_nohz_tick_stopped()) + cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load); + else + cpu_load_update_periodic(this_rq, load); } /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 66bdc9acc283..31872bc53bc4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -776,6 +776,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, if (!ts->tick_stopped) { nohz_balance_enter_idle(cpu); calc_load_enter_idle(); + cpu_load_update_nohz_start(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; @@ -802,11 +803,11 @@ out: return tick; } -static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active) +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { /* Update jiffies first */ tick_do_update_jiffies64(now); - cpu_load_update_nohz(active); + cpu_load_update_nohz_stop(); calc_load_exit_idle(); touch_softlockup_watchdog_sched(); @@ -833,7 +834,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) if (can_stop_full_tick(ts)) tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); else if (ts->tick_stopped) - tick_nohz_restart_sched_tick(ts, ktime_get(), 1); + tick_nohz_restart_sched_tick(ts, ktime_get()); #endif } @@ -1024,7 +1025,7 @@ void tick_nohz_idle_exit(void) tick_nohz_stop_idle(ts, now); if (ts->tick_stopped) { - tick_nohz_restart_sched_tick(ts, now, 0); + tick_nohz_restart_sched_tick(ts, now); tick_nohz_account_idle_ticks(ts); } -- cgit v1.2.3 From dfc57732ad38f93ae6232a3b4e64fd077383a0f1 Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Wed, 20 Apr 2016 19:23:43 +0200 Subject: iio:core: mounting matrix support Expose a rotation matrix to indicate userspace the chip placement with respect to the overall hardware system. This is needed to adjust coordinates sampled from a sensor chip when its position deviates from the main hardware system. Final coordinates computation is delegated to userspace since: * computation may involve floating point arithmetics ; * it allows an application to combine adjustments with arbitrary transformations. This 3 dimentional space rotation matrix is expressed as 3x3 array of strings to support floating point numbers. It may be retrieved from a "[_][_]mount_matrix" sysfs attribute file. It is declared into a device / driver specific DTS property or platform data. Signed-off-by: Gregor Boirie Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 51 ++++++++++++++++++++ drivers/iio/industrialio-core.c | 82 +++++++++++++++++++++++++++++++++ include/linux/iio/iio.h | 31 +++++++++++++ 3 files changed, 164 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index f155eff910f9..ba8df69d40b0 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1512,3 +1512,54 @@ Contact: linux-iio@vger.kernel.org Description: Raw (unscaled no offset etc.) pH reading of a substance as a negative base-10 logarithm of hydrodium ions in a litre of water. + +What: /sys/bus/iio/devices/iio:deviceX/mount_matrix +What: /sys/bus/iio/devices/iio:deviceX/in_mount_matrix +What: /sys/bus/iio/devices/iio:deviceX/out_mount_matrix +KernelVersion: 4.6 +Contact: linux-iio@vger.kernel.org +Description: + Mounting matrix for IIO sensors. This is a rotation matrix which + informs userspace about sensor chip's placement relative to the + main hardware it is mounted on. + Main hardware placement is defined according to the local + reference frame related to the physical quantity the sensor + measures. + Given that the rotation matrix is defined in a board specific + way (platform data and / or device-tree), the main hardware + reference frame definition is left to the implementor's choice + (see below for a magnetometer example). + Applications should apply this rotation matrix to samples so + that when main hardware reference frame is aligned onto local + reference frame, then sensor chip reference frame is also + perfectly aligned with it. + Matrix is a 3x3 unitary matrix and typically looks like + [0, 1, 0; 1, 0, 0; 0, 0, -1]. Identity matrix + [1, 0, 0; 0, 1, 0; 0, 0, 1] means sensor chip and main hardware + are perfectly aligned with each other. + + For example, a mounting matrix for a magnetometer sensor informs + userspace about sensor chip's ORIENTATION relative to the main + hardware. + More specifically, main hardware orientation is defined with + respect to the LOCAL EARTH GEOMAGNETIC REFERENCE FRAME where : + * Y is in the ground plane and positive towards magnetic North ; + * X is in the ground plane, perpendicular to the North axis and + positive towards the East ; + * Z is perpendicular to the ground plane and positive upwards. + + An implementor might consider that for a hand-held device, a + 'natural' orientation would be 'front facing camera at the top'. + The main hardware reference frame could then be described as : + * Y is in the plane of the screen and is positive towards the + top of the screen ; + * X is in the plane of the screen, perpendicular to Y axis, and + positive towards the right hand side of the screen ; + * Z is perpendicular to the screen plane and positive out of the + screen. + Another example for a quadrotor UAV might be : + * Y is in the plane of the propellers and positive towards the + front-view camera; + * X is in the plane of the propellers, perpendicular to Y axis, + and positive towards the starboard side of the UAV ; + * Z is perpendicular to propellers plane and positive upwards. diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 190a5939fd8c..e6319a9346b2 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -412,6 +412,88 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev, } EXPORT_SYMBOL_GPL(iio_enum_write); +static const struct iio_mount_matrix iio_mount_idmatrix = { + .rotation = { + "1", "0", "0", + "0", "1", "0", + "0", "0", "1" + } +}; + +static int iio_setup_mount_idmatrix(const struct device *dev, + struct iio_mount_matrix *matrix) +{ + *matrix = iio_mount_idmatrix; + dev_info(dev, "mounting matrix not found: using identity...\n"); + return 0; +} + +ssize_t iio_show_mount_matrix(struct iio_dev *indio_dev, uintptr_t priv, + const struct iio_chan_spec *chan, char *buf) +{ + const struct iio_mount_matrix *mtx = ((iio_get_mount_matrix_t *) + priv)(indio_dev, chan); + + if (IS_ERR(mtx)) + return PTR_ERR(mtx); + + if (!mtx) + mtx = &iio_mount_idmatrix; + + return snprintf(buf, PAGE_SIZE, "%s, %s, %s; %s, %s, %s; %s, %s, %s\n", + mtx->rotation[0], mtx->rotation[1], mtx->rotation[2], + mtx->rotation[3], mtx->rotation[4], mtx->rotation[5], + mtx->rotation[6], mtx->rotation[7], mtx->rotation[8]); +} +EXPORT_SYMBOL_GPL(iio_show_mount_matrix); + +/** + * of_iio_read_mount_matrix() - retrieve iio device mounting matrix from + * device-tree "mount-matrix" property + * @dev: device the mounting matrix property is assigned to + * @propname: device specific mounting matrix property name + * @matrix: where to store retrieved matrix + * + * If device is assigned no mounting matrix property, a default 3x3 identity + * matrix will be filled in. + * + * Return: 0 if success, or a negative error code on failure. + */ +#ifdef CONFIG_OF +int of_iio_read_mount_matrix(const struct device *dev, + const char *propname, + struct iio_mount_matrix *matrix) +{ + if (dev->of_node) { + int err = of_property_read_string_array(dev->of_node, + propname, matrix->rotation, + ARRAY_SIZE(iio_mount_idmatrix.rotation)); + + if (err == ARRAY_SIZE(iio_mount_idmatrix.rotation)) + return 0; + + if (err >= 0) + /* Invalid number of matrix entries. */ + return -EINVAL; + + if (err != -EINVAL) + /* Invalid matrix declaration format. */ + return err; + } + + /* Matrix was not declared at all: fallback to identity. */ + return iio_setup_mount_idmatrix(dev, matrix); +} +#else +int of_iio_read_mount_matrix(const struct device *dev, + const char *propname, + struct iio_mount_matrix *matrix) +{ + return iio_setup_mount_idmatrix(dev, matrix); +} +#endif +EXPORT_SYMBOL(of_iio_read_mount_matrix); + /** * iio_format_value() - Formats a IIO value into its string representation * @buf: The buffer to which the formatted value gets written diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 0b2773ada0ba..7c29cb0124ae 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -147,6 +147,37 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev, .private = (uintptr_t)(_e), \ } +/** + * struct iio_mount_matrix - iio mounting matrix + * @rotation: 3 dimensional space rotation matrix defining sensor alignment with + * main hardware + */ +struct iio_mount_matrix { + const char *rotation[9]; +}; + +ssize_t iio_show_mount_matrix(struct iio_dev *indio_dev, uintptr_t priv, + const struct iio_chan_spec *chan, char *buf); +int of_iio_read_mount_matrix(const struct device *dev, const char *propname, + struct iio_mount_matrix *matrix); + +typedef const struct iio_mount_matrix * + (iio_get_mount_matrix_t)(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan); + +/** + * IIO_MOUNT_MATRIX() - Initialize mount matrix extended channel attribute + * @_shared: Whether the attribute is shared between all channels + * @_get: Pointer to an iio_get_mount_matrix_t accessor + */ +#define IIO_MOUNT_MATRIX(_shared, _get) \ +{ \ + .name = "mount_matrix", \ + .shared = (_shared), \ + .read = iio_show_mount_matrix, \ + .private = (uintptr_t)(_get), \ +} + /** * struct iio_event_spec - specification for a channel event * @type: Type of the event -- cgit v1.2.3 From 97eacb9166f4810368e180073dcbceeff0de34df Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Wed, 20 Apr 2016 19:23:44 +0200 Subject: iio:ak8975: add mounting matrix support Expose a rotation matrix to indicate userspace the chip orientation with respect to the overall hardware system. Matrix is retrieved from "in_mount_matrix". It is declared into ak8975 DTS entry as a "mount-matrix" property. Signed-off-by: Gregor Boirie Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- .../bindings/iio/magnetometer/ak8975.txt | 10 +++++++ drivers/iio/magnetometer/ak8975.c | 34 +++++++++++++++++++--- include/linux/iio/magnetometer/ak8975.h | 16 ++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 include/linux/iio/magnetometer/ak8975.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt b/Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt index 34a3206eefdf..e1e7dd3259f6 100644 --- a/Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt +++ b/Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt @@ -9,6 +9,7 @@ Optional properties: - gpios : should be device tree identifier of the magnetometer DRDY pin - vdd-supply: an optional regulator that needs to be on to provide VDD + - mount-matrix: an optional 3x3 mounting rotation matrix Example: @@ -17,4 +18,13 @@ ak8975@0c { reg = <0x0c>; gpios = <&gpj0 7 0>; vdd-supply = <&ldo_3v3_gnss>; + mount-matrix = "-0.984807753012208", /* x0 */ + "0", /* y0 */ + "-0.173648177666930", /* z0 */ + "0", /* x1 */ + "-1", /* y1 */ + "0", /* z1 */ + "-0.173648177666930", /* x2 */ + "0", /* y2 */ + "0.984807753012208"; /* z2 */ }; diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index a2aac50a0149..dbf066129a04 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -40,7 +40,8 @@ #include #include #include -#include + +#include /* * Register definitions, as well as various shifts and masks to get at the @@ -376,6 +377,7 @@ struct ak8975_data { wait_queue_head_t data_ready_queue; unsigned long flags; u8 cntl_cache; + struct iio_mount_matrix orientation; struct regulator *vdd; }; @@ -726,6 +728,18 @@ static int ak8975_read_raw(struct iio_dev *indio_dev, return -EINVAL; } +static const struct iio_mount_matrix * +ak8975_get_mount_matrix(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + return &((struct ak8975_data *)iio_priv(indio_dev))->orientation; +} + +static const struct iio_chan_spec_ext_info ak8975_ext_info[] = { + IIO_MOUNT_MATRIX(IIO_SHARED_BY_DIR, ak8975_get_mount_matrix), + { }, +}; + #define AK8975_CHANNEL(axis, index) \ { \ .type = IIO_MAGN, \ @@ -740,7 +754,8 @@ static int ak8975_read_raw(struct iio_dev *indio_dev, .realbits = 16, \ .storagebits = 16, \ .endianness = IIO_CPU \ - } \ + }, \ + .ext_info = ak8975_ext_info, \ } static const struct iio_chan_spec ak8975_channels[] = { @@ -837,10 +852,12 @@ static int ak8975_probe(struct i2c_client *client, int err; const char *name = NULL; enum asahi_compass_chipset chipset; + const struct ak8975_platform_data *pdata = + dev_get_platdata(&client->dev); /* Grab and set up the supplied GPIO. */ - if (client->dev.platform_data) - eoc_gpio = *(int *)(client->dev.platform_data); + if (pdata) + eoc_gpio = pdata->eoc_gpio; else if (client->dev.of_node) eoc_gpio = of_get_gpio(client->dev.of_node, 0); else @@ -874,6 +891,15 @@ static int ak8975_probe(struct i2c_client *client, data->eoc_gpio = eoc_gpio; data->eoc_irq = 0; + if (!pdata) { + err = of_iio_read_mount_matrix(&client->dev, + "mount-matrix", + &data->orientation); + if (err) + return err; + } else + data->orientation = pdata->orientation; + /* id will be NULL when enumerated via ACPI */ if (id) { chipset = (enum asahi_compass_chipset)(id->driver_data); diff --git a/include/linux/iio/magnetometer/ak8975.h b/include/linux/iio/magnetometer/ak8975.h new file mode 100644 index 000000000000..c8400959d197 --- /dev/null +++ b/include/linux/iio/magnetometer/ak8975.h @@ -0,0 +1,16 @@ +#ifndef __IIO_MAGNETOMETER_AK8975_H__ +#define __IIO_MAGNETOMETER_AK8975_H__ + +#include + +/** + * struct ak8975_platform_data - AK8975 magnetometer driver platform data + * @eoc_gpio: data ready event gpio + * @orientation: mounting matrix relative to main hardware + */ +struct ak8975_platform_data { + int eoc_gpio; + struct iio_mount_matrix orientation; +}; + +#endif -- cgit v1.2.3 From eb3798463f71afc77abd25b2f62708be06f7173b Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Wed, 20 Apr 2016 19:23:45 +0200 Subject: iio:imu:mpu6050: enhance mounting matrix support Add a new rotation matrix sysfs attribute compliant with IIO core mounting matrix API. Matrix is retrieved from "in_anglvel_mount_matrix" and "in_accel_mount_matrix" sysfs attributes. It is declared into mpu6050 DTS entry as a "mount-matrix" property. Old interface is kept for backward userspace compatibility and may be retrieved from legacy platform_data mechanism only. Signed-off-by: Gregor Boirie Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 2 ++ .../devicetree/bindings/iio/imu/inv_mpu6050.txt | 13 ++++++++ drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 36 ++++++++++++++++++++-- drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h | 4 ++- include/linux/platform_data/invensense_mpu6050.h | 5 ++- 5 files changed, 55 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index ba8df69d40b0..df44998e7506 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1516,6 +1516,8 @@ Description: What: /sys/bus/iio/devices/iio:deviceX/mount_matrix What: /sys/bus/iio/devices/iio:deviceX/in_mount_matrix What: /sys/bus/iio/devices/iio:deviceX/out_mount_matrix +What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_mount_matrix +What: /sys/bus/iio/devices/iio:deviceX/in_accel_mount_matrix KernelVersion: 4.6 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt index e4d8f1c52f4a..a9fc11e43b45 100644 --- a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt +++ b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt @@ -8,10 +8,23 @@ Required properties: - interrupt-parent : should be the phandle for the interrupt controller - interrupts : interrupt mapping for GPIO IRQ +Optional properties: + - mount-matrix: an optional 3x3 mounting rotation matrix + + Example: mpu6050@68 { compatible = "invensense,mpu6050"; reg = <0x68>; interrupt-parent = <&gpio1>; interrupts = <18 1>; + mount-matrix = "-0.984807753012208", /* x0 */ + "0", /* y0 */ + "-0.173648177666930", /* z0 */ + "0", /* x1 */ + "-1", /* y1 */ + "0", /* z1 */ + "-0.173648177666930", /* x2 */ + "0", /* y2 */ + "0.984807753012208"; /* z2 */ }; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index d192953e9a38..482a2490c53a 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -600,6 +600,10 @@ inv_fifo_rate_show(struct device *dev, struct device_attribute *attr, /** * inv_attr_show() - calling this function will show current * parameters. + * + * Deprecated in favor of IIO mounting matrix API. + * + * See inv_get_mount_matrix() */ static ssize_t inv_attr_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -644,6 +648,18 @@ static int inv_mpu6050_validate_trigger(struct iio_dev *indio_dev, return 0; } +static const struct iio_mount_matrix * +inv_get_mount_matrix(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + return &((struct inv_mpu6050_state *)iio_priv(indio_dev))->orientation; +} + +static const struct iio_chan_spec_ext_info inv_ext_info[] = { + IIO_MOUNT_MATRIX(IIO_SHARED_BY_TYPE, inv_get_mount_matrix), + { }, +}; + #define INV_MPU6050_CHAN(_type, _channel2, _index) \ { \ .type = _type, \ @@ -660,6 +676,7 @@ static int inv_mpu6050_validate_trigger(struct iio_dev *indio_dev, .shift = 0, \ .endianness = IIO_BE, \ }, \ + .ext_info = inv_ext_info, \ } static const struct iio_chan_spec inv_mpu_channels[] = { @@ -692,14 +709,16 @@ static IIO_CONST_ATTR(in_accel_scale_available, "0.000598 0.001196 0.002392 0.004785"); static IIO_DEV_ATTR_SAMP_FREQ(S_IRUGO | S_IWUSR, inv_fifo_rate_show, inv_mpu6050_fifo_rate_store); + +/* Deprecated: kept for userspace backward compatibility. */ static IIO_DEVICE_ATTR(in_gyro_matrix, S_IRUGO, inv_attr_show, NULL, ATTR_GYRO_MATRIX); static IIO_DEVICE_ATTR(in_accel_matrix, S_IRUGO, inv_attr_show, NULL, ATTR_ACCL_MATRIX); static struct attribute *inv_attributes[] = { - &iio_dev_attr_in_gyro_matrix.dev_attr.attr, - &iio_dev_attr_in_accel_matrix.dev_attr.attr, + &iio_dev_attr_in_gyro_matrix.dev_attr.attr, /* deprecated */ + &iio_dev_attr_in_accel_matrix.dev_attr.attr, /* deprecated */ &iio_dev_attr_sampling_frequency.dev_attr.attr, &iio_const_attr_sampling_frequency_available.dev_attr.attr, &iio_const_attr_in_accel_scale_available.dev_attr.attr, @@ -779,9 +798,20 @@ int inv_mpu_core_probe(struct regmap *regmap, int irq, const char *name, st->powerup_count = 0; st->irq = irq; st->map = regmap; + pdata = dev_get_platdata(dev); - if (pdata) + if (!pdata) { + result = of_iio_read_mount_matrix(dev, "mount-matrix", + &st->orientation); + if (result) { + dev_err(dev, "Failed to retrieve mounting matrix %d\n", + result); + return result; + } + } else { st->plat_data = *pdata; + } + /* power is turned on inside check chip type*/ result = inv_check_and_setup_chip(st); if (result) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h index e302a49703bf..52d60cdc9f16 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h @@ -114,7 +114,8 @@ struct inv_mpu6050_hw { * @hw: Other hardware-specific information. * @chip_type: chip type. * @time_stamp_lock: spin lock to time stamp. - * @plat_data: platform data. + * @plat_data: platform data (deprecated in favor of @orientation). + * @orientation: sensor chip orientation relative to main hardware. * @timestamps: kfifo queue to store time stamp. * @map regmap pointer. * @irq interrupt number. @@ -131,6 +132,7 @@ struct inv_mpu6050_state { struct i2c_client *mux_client; unsigned int powerup_count; struct inv_mpu6050_platform_data plat_data; + struct iio_mount_matrix orientation; DECLARE_KFIFO(timestamps, long long, TIMESTAMP_FIFO_SIZE); struct regmap *map; int irq; diff --git a/include/linux/platform_data/invensense_mpu6050.h b/include/linux/platform_data/invensense_mpu6050.h index ad3aa7b95f35..554b59801aa8 100644 --- a/include/linux/platform_data/invensense_mpu6050.h +++ b/include/linux/platform_data/invensense_mpu6050.h @@ -16,13 +16,16 @@ /** * struct inv_mpu6050_platform_data - Platform data for the mpu driver - * @orientation: Orientation matrix of the chip + * @orientation: Orientation matrix of the chip (deprecated in favor of + * mounting matrix retrieved from device-tree) * * Contains platform specific information on how to configure the MPU6050 to * work on this platform. The orientation matricies are 3x3 rotation matricies * that are applied to the data to rotate from the mounting orientation to the * platform orientation. The values must be one of 0, 1, or -1 and each row and * column should have exactly 1 non-zero value. + * + * Deprecated in favor of mounting matrix retrieved from device-tree. */ struct inv_mpu6050_platform_data { __s8 orientation[9]; -- cgit v1.2.3 From e9bbe898cbe89b17ad3993c136aa13d0431cd537 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 22 Apr 2016 17:31:19 +0200 Subject: libnl: nla_put_net64(): align on a 64-bit area nla_data() is now aligned on a 64-bit area. The temporary function nla_put_be64_32bit() is removed in this patch. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netfilter/ipset/ip_set.h | 9 ++++++--- include/net/netlink.h | 14 ++++++-------- include/uapi/linux/netfilter/ipset/ip_set.h | 1 + 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f48b8a664b0f..83b9a2e0d8d4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -351,7 +351,8 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) return ((skbinfo->skbmark || skbinfo->skbmarkmask) && nla_put_net64(skb, IPSET_ATTR_SKBMARK, cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask))) || + skbinfo->skbmarkmask), + IPSET_ATTR_PAD)) || (skbinfo->skbprio && nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || @@ -374,9 +375,11 @@ static inline bool ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) { return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter))) || + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter))); + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); } static inline void diff --git a/include/net/netlink.h b/include/net/netlink.h index 47d7d1356fa3..066a921e7cbe 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -868,20 +868,18 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr); } -static inline int nla_put_be64_32bit(struct sk_buff *skb, int attrtype, - __be64 value) -{ - return nla_put(skb, attrtype, sizeof(__be64), &value); -} /** - * nla_put_net64 - Add 64-bit network byte order netlink attribute to a socket buffer + * nla_put_net64 - Add 64-bit network byte order nlattr to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value) +static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, + int padattr) { - return nla_put_be64_32bit(skb, attrtype | NLA_F_NET_BYTEORDER, value); + return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value, + padattr); } /** diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 63b2e34f1b60..ebb5154976de 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -118,6 +118,7 @@ enum { IPSET_ATTR_SKBMARK, IPSET_ATTR_SKBPRIO, IPSET_ATTR_SKBQUEUE, + IPSET_ATTR_PAD, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) -- cgit v1.2.3 From 41617e1a8dec9fe082ba5dec26bacb154eb55482 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 24 Apr 2016 00:56:07 -0400 Subject: jbd2: add support for avoiding data writes during transaction commits Currently when filesystem needs to make sure data is on permanent storage before committing a transaction it adds inode to transaction's inode list. During transaction commit, jbd2 writes back all dirty buffers that have allocated underlying blocks and waits for the IO to finish. However when doing writeback for delayed allocated data, we allocate blocks and immediately submit the data. Thus asking jbd2 to write dirty pages just unnecessarily adds more work to jbd2 possibly writing back other redirtied blocks. Add support to jbd2 to allow filesystem to ask jbd2 to only wait for outstanding data writes before committing a transaction and thus avoid unnecessary writes. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.h | 3 ++- fs/jbd2/commit.c | 4 ++++ fs/jbd2/journal.c | 3 ++- fs/jbd2/transaction.c | 22 ++++++++++++++++++---- fs/ocfs2/journal.h | 2 +- include/linux/jbd2.h | 13 +++++++++++-- 6 files changed, 38 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5f5846211095..f1c940b38b30 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -362,7 +362,8 @@ static inline int ext4_journal_force_commit(journal_t *journal) static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) { if (ext4_handle_valid(handle)) - return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode); + return jbd2_journal_inode_add_write(handle, + EXT4_I(inode)->jinode); return 0; } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 2ad98d6e19f4..70078096117d 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -219,6 +219,8 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + if (!(jinode->i_flags & JI_WRITE_DATA)) + continue; mapping = jinode->i_vfs_inode->i_mapping; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); @@ -256,6 +258,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + if (!(jinode->i_flags & JI_WAIT_DATA)) + continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 435f0b26ac20..b31852f76f46 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -94,7 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page); EXPORT_SYMBOL(jbd2_journal_invalidatepage); EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); -EXPORT_SYMBOL(jbd2_journal_file_inode); +EXPORT_SYMBOL(jbd2_journal_inode_add_write); +EXPORT_SYMBOL(jbd2_journal_inode_add_wait); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 67c103867bf8..be56c8ca34c2 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2462,7 +2462,8 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) /* * File inode in the inode list of the handle's transaction */ -int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) +static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, + unsigned long flags) { transaction_t *transaction = handle->h_transaction; journal_t *journal; @@ -2487,12 +2488,14 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) * and if jinode->i_next_transaction == transaction, commit code * will only file the inode where we want it. */ - if (jinode->i_transaction == transaction || - jinode->i_next_transaction == transaction) + if ((jinode->i_transaction == transaction || + jinode->i_next_transaction == transaction) && + (jinode->i_flags & flags) == flags) return 0; spin_lock(&journal->j_list_lock); - + jinode->i_flags |= flags; + /* Is inode already attached where we need it? */ if (jinode->i_transaction == transaction || jinode->i_next_transaction == transaction) goto done; @@ -2523,6 +2526,17 @@ done: return 0; } +int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) +{ + return jbd2_journal_file_inode(handle, jinode, + JI_WRITE_DATA | JI_WAIT_DATA); +} + +int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) +{ + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); +} + /* * File truncate and transaction commit interact with each other in a * non-trivial way. If a transaction writing data block A is diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index f4cd3c3e9fb7..497a4171ef61 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -619,7 +619,7 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) { - return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode); + return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode); } static inline int ocfs2_begin_ordered_truncate(struct inode *inode, diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd1083c46c61..39511484ad10 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -403,11 +403,19 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) /* Flags in jbd_inode->i_flags */ #define __JI_COMMIT_RUNNING 0 -/* Commit of the inode data in progress. We use this flag to protect us from +#define __JI_WRITE_DATA 1 +#define __JI_WAIT_DATA 2 + +/* + * Commit of the inode data in progress. We use this flag to protect us from * concurrent deletion of inode. We cannot use reference to inode for this * since we cannot afford doing last iput() on behalf of kjournald */ #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) +/* Write allocated dirty buffers in this inode before commit */ +#define JI_WRITE_DATA (1 << __JI_WRITE_DATA) +/* Wait for outstanding data writes for this inode before commit */ +#define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** * struct jbd_inode is the structure linking inodes in ordered mode @@ -1270,7 +1278,8 @@ extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *); -extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); -- cgit v1.2.3 From 4b1a9e6934ec6e38138c66c2f73cf6f3695a9c6c Mon Sep 17 00:00:00 2001 From: Ashok Kumar Date: Thu, 21 Apr 2016 05:58:44 -0700 Subject: arm64/perf: Filter common events based on PMCEIDn_EL0 The complete common architectural and micro-architectural event number structure is filtered based on PMCEIDn_EL0 and exposed to /sys using is_visibile function pointer in events attribute_group. To filter the events in is_visible function, pmceid based bitmap is stored in arm_pmu structure and the id field from perf_pmu_events_attr is used to check against the bitmap. The function which derives event bitmap from PMCEIDn_EL0 is executed in the cpus, which has the pmu being initialized, for heterogeneous pmu support. Acked-by: Mark Rutland Signed-off-by: Ashok Kumar Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 70 +++++++++++++++++++++++++++++++++--------- include/linux/perf/arm_pmu.h | 2 ++ 2 files changed, 57 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 946ce4badb8e..e6a0fdb2538d 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -326,10 +326,22 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; + +static ssize_t +armv8pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%03llx\n", pmu_attr->id); +} + #define ARMV8_EVENT_ATTR_RESOLVE(m) #m #define ARMV8_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR_STRING(name, armv8_event_attr_##name, \ - "event=" ARMV8_EVENT_ATTR_RESOLVE(config)) + PMU_EVENT_ATTR(name, armv8_event_attr_##name, \ + config, armv8pmu_events_sysfs_show) ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR); ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL); @@ -434,9 +446,27 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { NULL, }; +static umode_t +armv8pmu_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + + if (test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) + return attr->mode; + + return 0; +} + static struct attribute_group armv8_pmuv3_events_attr_group = { .name = "events", .attrs = armv8_pmuv3_event_attrs, + .is_visible = armv8pmu_event_attr_is_visible, }; PMU_FORMAT_ATTR(event, "config:0-9"); @@ -859,22 +889,31 @@ static int armv8_thunder_map_event(struct perf_event *event) ARMV8_PMU_EVTYPE_EVENT); } -static void armv8pmu_read_num_pmnc_events(void *info) +static void __armv8pmu_probe_pmu(void *info) { - int *nb_cnt = info; + struct arm_pmu *cpu_pmu = info; + u32 pmceid[2]; /* Read the nb of CNTx counters supported from PMNC */ - *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; + cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) + & ARMV8_PMU_PMCR_N_MASK; /* Add the CPU cycles counter */ - *nb_cnt += 1; + cpu_pmu->num_events += 1; + + pmceid[0] = read_sysreg(pmceid0_el0); + pmceid[1] = read_sysreg(pmceid1_el0); + + bitmap_from_u32array(cpu_pmu->pmceid_bitmap, + ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid, + ARRAY_SIZE(pmceid)); } -static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu) +static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) { - return smp_call_function_any(&arm_pmu->supported_cpus, - armv8pmu_read_num_pmnc_events, - &arm_pmu->num_events, 1); + return smp_call_function_any(&cpu_pmu->supported_cpus, + __armv8pmu_probe_pmu, + cpu_pmu, 1); } static void armv8_pmu_init(struct arm_pmu *cpu_pmu) @@ -897,7 +936,8 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; - return armv8pmu_probe_num_events(cpu_pmu); + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) @@ -906,7 +946,7 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) @@ -915,7 +955,7 @@ static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) @@ -924,7 +964,7 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cortex_a72"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) @@ -933,7 +973,7 @@ static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv8_cavium_thunder"; cpu_pmu->map_event = armv8_thunder_map_event; cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; - return armv8pmu_probe_num_events(cpu_pmu); + return armv8pmu_probe_pmu(cpu_pmu); } static const struct of_device_id armv8_pmu_of_device_ids[] = { diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4196c90a3c88..d28ac05c7f92 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -105,6 +105,8 @@ struct arm_pmu { struct mutex reserve_mutex; u64 max_period; bool secure_access; /* 32-bit ARM only */ +#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 + DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct notifier_block hotplug_nb; -- cgit v1.2.3 From 642aa8cee7b84eee1dc4897e301611cffa6d5775 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 Apr 2016 14:28:55 +0530 Subject: PM / OPP: dev_pm_opp_set_sharing_cpus() doesn't depend on CONFIG_OF dev_pm_opp_set_sharing_cpus() doesn't do any DT specific stuff and its declarations are added within the CONFIG_OF ifdef by mistake. Take them out of that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cccaf4a29e9f..5b6ad31403a5 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -65,6 +65,7 @@ void dev_pm_opp_put_prop_name(struct device *dev); int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); +int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -178,6 +179,11 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f return -EINVAL; } +static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +{ + return -ENOSYS; +} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) @@ -186,7 +192,6 @@ void dev_pm_opp_of_remove_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask); void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask); int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); -int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -210,11 +215,6 @@ static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask { return -ENOSYS; } - -static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) -{ - return -ENOSYS; -} #endif #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 7ba2f2757d84eae533679306f03c93c118437a87 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 23 Apr 2016 22:47:08 +0200 Subject: spi: core: add hook flash_read_supported to spi_master If hook spi_flash_read is implemented the fast flash read feature is enabled for all devices attached to the respective master. In most cases there is just one flash chip, however there are also devices with more than one flash chip, namely some WiFi routers. Then the fast flash read feature can be used for the first chip only. OpenWRT implemented an own handling of this case, using controller_data element of spi_device to hold the information whether fast flash read can be used for a device. This patch adds hook flash_read_supported to spi_master which is used to extend spi_flash_read_supported() by checking whether the fast flash read feature can be used for a specific spi_device. If the hook is not implemented the default behavior is to allow fast flash read for all devices (if spi_flash_read is implemented). Signed-off-by: Heiner Kallweit Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 857a9a1d82b5..1f03483f61e5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -372,6 +372,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @unprepare_message: undo any work done by prepare_message(). * @spi_flash_read: to support spi-controller hardwares that provide * accelerated interface to read from flash devices. + * @flash_read_supported: spi device supports flash read * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that * are not GPIOs (driven by the SPI controller itself). @@ -529,6 +530,7 @@ struct spi_master { struct spi_message *message); int (*spi_flash_read)(struct spi_device *spi, struct spi_flash_read_message *msg); + bool (*flash_read_supported)(struct spi_device *spi); /* * These hooks are for drivers that use a generic implementation @@ -1158,7 +1160,9 @@ struct spi_flash_read_message { /* SPI core interface for flash read support */ static inline bool spi_flash_read_supported(struct spi_device *spi) { - return spi->master->spi_flash_read ? true : false; + return spi->master->spi_flash_read && + (!spi->master->flash_read_supported || + spi->master->flash_read_supported(spi)); } int spi_flash_read(struct spi_device *spi, -- cgit v1.2.3 From a558da0916b90c330940a106105d0a6a67cb77f7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 25 Apr 2016 10:25:20 +0200 Subject: ieee802154: use nla_put_u64_64bit() Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/nl802154.h | 2 ++ net/ieee802154/nl-mac.c | 17 +++++++++++------ net/ieee802154/nl802154.c | 3 ++- 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 167342c2ce6b..0f6f6607f592 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -92,6 +92,8 @@ enum { IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, + IEEE802154_ATTR_PAD, + __IEEE802154_ATTR_MAX, }; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 3503c38954f9..d3cbb3258718 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -34,9 +34,11 @@ #include "ieee802154.h" -static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr) +static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr, + int padattr) { - return nla_put_u64(msg, type, swab64((__force u64)hwaddr)); + return nla_put_u64_64bit(msg, type, swab64((__force u64)hwaddr), + padattr); } static __le64 nla_get_hwaddr(const struct nlattr *nla) @@ -623,7 +625,8 @@ ieee802154_llsec_fill_key_id(struct sk_buff *msg, if (desc->device_addr.mode == IEEE802154_ADDR_LONG && nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, - desc->device_addr.extended_addr)) + desc->device_addr.extended_addr, + IEEE802154_ATTR_PAD)) return -EMSGSIZE; } @@ -638,7 +641,7 @@ ieee802154_llsec_fill_key_id(struct sk_buff *msg, if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, - desc->extended_source)) + desc->extended_source, IEEE802154_ATTR_PAD)) return -EMSGSIZE; return 0; @@ -1063,7 +1066,8 @@ ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, desc->short_addr) || - nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr, + IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, desc->frame_counter) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, @@ -1167,7 +1171,8 @@ ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || - nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr, + IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, devkey->frame_counter) || ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 614072064d03..8035c93dd527 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -813,7 +813,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx) || nla_put_u32(msg, NL802154_ATTR_IFTYPE, wpan_dev->iftype) || - nla_put_u64(msg, NL802154_ATTR_WPAN_DEV, wpan_dev_id(wpan_dev)) || + nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV, + wpan_dev_id(wpan_dev), NL802154_ATTR_PAD) || nla_put_u32(msg, NL802154_ATTR_GENERATION, rdev->devlist_generation ^ (cfg802154_rdev_list_generation << 2))) -- cgit v1.2.3 From d4967cf38fbd62467b8fb5cab63d7da1f5907ed7 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 22 Apr 2016 08:41:01 +0300 Subject: qed*: Align statistics names There's a difference in statsitics' names starting at qed and propagating to qede, where egress counters indicate ranges while ingress counters indiciate high-end. Align all statistcs to follow the same conventions - name indicates range. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 20 ++++++++--------- drivers/net/ethernet/qlogic/qede/qede.h | 20 ++++++++--------- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 20 ++++++++--------- drivers/net/ethernet/qlogic/qede/qede_main.c | 29 ++++++++++++++++--------- include/linux/qed/qed_if.h | 20 ++++++++--------- 5 files changed, 59 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index fb5f3b815340..31e1d510a991 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1415,16 +1415,16 @@ static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn, sizeof(port_stats)); p_stats->rx_64_byte_packets += port_stats.pmm.r64; - p_stats->rx_127_byte_packets += port_stats.pmm.r127; - p_stats->rx_255_byte_packets += port_stats.pmm.r255; - p_stats->rx_511_byte_packets += port_stats.pmm.r511; - p_stats->rx_1023_byte_packets += port_stats.pmm.r1023; - p_stats->rx_1518_byte_packets += port_stats.pmm.r1518; - p_stats->rx_1522_byte_packets += port_stats.pmm.r1522; - p_stats->rx_2047_byte_packets += port_stats.pmm.r2047; - p_stats->rx_4095_byte_packets += port_stats.pmm.r4095; - p_stats->rx_9216_byte_packets += port_stats.pmm.r9216; - p_stats->rx_16383_byte_packets += port_stats.pmm.r16383; + p_stats->rx_65_to_127_byte_packets += port_stats.pmm.r127; + p_stats->rx_128_to_255_byte_packets += port_stats.pmm.r255; + p_stats->rx_256_to_511_byte_packets += port_stats.pmm.r511; + p_stats->rx_512_to_1023_byte_packets += port_stats.pmm.r1023; + p_stats->rx_1024_to_1518_byte_packets += port_stats.pmm.r1518; + p_stats->rx_1519_to_1522_byte_packets += port_stats.pmm.r1522; + p_stats->rx_1519_to_2047_byte_packets += port_stats.pmm.r2047; + p_stats->rx_2048_to_4095_byte_packets += port_stats.pmm.r4095; + p_stats->rx_4096_to_9216_byte_packets += port_stats.pmm.r9216; + p_stats->rx_9217_to_16383_byte_packets += port_stats.pmm.r16383; p_stats->rx_crc_errors += port_stats.pmm.rfcs; p_stats->rx_mac_crtl_frames += port_stats.pmm.rxcf; p_stats->rx_pause_frames += port_stats.pmm.rxpf; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 16df1591388f..a687e7a1dc8d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -59,16 +59,16 @@ struct qede_stats { /* port */ u64 rx_64_byte_packets; - u64 rx_127_byte_packets; - u64 rx_255_byte_packets; - u64 rx_511_byte_packets; - u64 rx_1023_byte_packets; - u64 rx_1518_byte_packets; - u64 rx_1522_byte_packets; - u64 rx_2047_byte_packets; - u64 rx_4095_byte_packets; - u64 rx_9216_byte_packets; - u64 rx_16383_byte_packets; + u64 rx_65_to_127_byte_packets; + u64 rx_128_to_255_byte_packets; + u64 rx_256_to_511_byte_packets; + u64 rx_512_to_1023_byte_packets; + u64 rx_1024_to_1518_byte_packets; + u64 rx_1519_to_1522_byte_packets; + u64 rx_1519_to_2047_byte_packets; + u64 rx_2048_to_4095_byte_packets; + u64 rx_4096_to_9216_byte_packets; + u64 rx_9217_to_16383_byte_packets; u64 rx_crc_errors; u64 rx_mac_crtl_frames; u64 rx_pause_frames; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index f0982f163670..f87e83b41d5d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -59,16 +59,16 @@ static const struct { QEDE_STAT(tx_bcast_pkts), QEDE_PF_STAT(rx_64_byte_packets), - QEDE_PF_STAT(rx_127_byte_packets), - QEDE_PF_STAT(rx_255_byte_packets), - QEDE_PF_STAT(rx_511_byte_packets), - QEDE_PF_STAT(rx_1023_byte_packets), - QEDE_PF_STAT(rx_1518_byte_packets), - QEDE_PF_STAT(rx_1522_byte_packets), - QEDE_PF_STAT(rx_2047_byte_packets), - QEDE_PF_STAT(rx_4095_byte_packets), - QEDE_PF_STAT(rx_9216_byte_packets), - QEDE_PF_STAT(rx_16383_byte_packets), + QEDE_PF_STAT(rx_65_to_127_byte_packets), + QEDE_PF_STAT(rx_128_to_255_byte_packets), + QEDE_PF_STAT(rx_256_to_511_byte_packets), + QEDE_PF_STAT(rx_512_to_1023_byte_packets), + QEDE_PF_STAT(rx_1024_to_1518_byte_packets), + QEDE_PF_STAT(rx_1519_to_1522_byte_packets), + QEDE_PF_STAT(rx_1519_to_2047_byte_packets), + QEDE_PF_STAT(rx_2048_to_4095_byte_packets), + QEDE_PF_STAT(rx_4096_to_9216_byte_packets), + QEDE_PF_STAT(rx_9217_to_16383_byte_packets), QEDE_PF_STAT(tx_64_byte_packets), QEDE_PF_STAT(tx_65_to_127_byte_packets), QEDE_PF_STAT(tx_128_to_255_byte_packets), diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 197ef85684da..1e3ee49bae24 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1638,16 +1638,25 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->stats.coalesced_bytes = stats.tpa_coalesced_bytes; edev->stats.rx_64_byte_packets = stats.rx_64_byte_packets; - edev->stats.rx_127_byte_packets = stats.rx_127_byte_packets; - edev->stats.rx_255_byte_packets = stats.rx_255_byte_packets; - edev->stats.rx_511_byte_packets = stats.rx_511_byte_packets; - edev->stats.rx_1023_byte_packets = stats.rx_1023_byte_packets; - edev->stats.rx_1518_byte_packets = stats.rx_1518_byte_packets; - edev->stats.rx_1522_byte_packets = stats.rx_1522_byte_packets; - edev->stats.rx_2047_byte_packets = stats.rx_2047_byte_packets; - edev->stats.rx_4095_byte_packets = stats.rx_4095_byte_packets; - edev->stats.rx_9216_byte_packets = stats.rx_9216_byte_packets; - edev->stats.rx_16383_byte_packets = stats.rx_16383_byte_packets; + edev->stats.rx_65_to_127_byte_packets = stats.rx_65_to_127_byte_packets; + edev->stats.rx_128_to_255_byte_packets = + stats.rx_128_to_255_byte_packets; + edev->stats.rx_256_to_511_byte_packets = + stats.rx_256_to_511_byte_packets; + edev->stats.rx_512_to_1023_byte_packets = + stats.rx_512_to_1023_byte_packets; + edev->stats.rx_1024_to_1518_byte_packets = + stats.rx_1024_to_1518_byte_packets; + edev->stats.rx_1519_to_1522_byte_packets = + stats.rx_1519_to_1522_byte_packets; + edev->stats.rx_1519_to_2047_byte_packets = + stats.rx_1519_to_2047_byte_packets; + edev->stats.rx_2048_to_4095_byte_packets = + stats.rx_2048_to_4095_byte_packets; + edev->stats.rx_4096_to_9216_byte_packets = + stats.rx_4096_to_9216_byte_packets; + edev->stats.rx_9217_to_16383_byte_packets = + stats.rx_9217_to_16383_byte_packets; edev->stats.rx_crc_errors = stats.rx_crc_errors; edev->stats.rx_mac_crtl_frames = stats.rx_mac_crtl_frames; edev->stats.rx_pause_frames = stats.rx_pause_frames; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 67e8c206b2c1..82a7fe011068 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -384,16 +384,16 @@ struct qed_eth_stats { /* port */ u64 rx_64_byte_packets; - u64 rx_127_byte_packets; - u64 rx_255_byte_packets; - u64 rx_511_byte_packets; - u64 rx_1023_byte_packets; - u64 rx_1518_byte_packets; - u64 rx_1522_byte_packets; - u64 rx_2047_byte_packets; - u64 rx_4095_byte_packets; - u64 rx_9216_byte_packets; - u64 rx_16383_byte_packets; + u64 rx_65_to_127_byte_packets; + u64 rx_128_to_255_byte_packets; + u64 rx_256_to_511_byte_packets; + u64 rx_512_to_1023_byte_packets; + u64 rx_1024_to_1518_byte_packets; + u64 rx_1519_to_1522_byte_packets; + u64 rx_1519_to_2047_byte_packets; + u64 rx_2048_to_4095_byte_packets; + u64 rx_4096_to_9216_byte_packets; + u64 rx_9217_to_16383_byte_packets; u64 rx_crc_errors; u64 rx_mac_crtl_frames; u64 rx_pause_frames; -- cgit v1.2.3 From fe7cd2bfdac4d8739bc8665eef040e668e6b428f Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 22 Apr 2016 08:41:03 +0300 Subject: qed*: Conditions for changing link There's some inconsistency in current logic determining whether the link settings of a given interface can be changed; I.e., in all modes other than the so-called `deault' mode the interfaces are forbidden from changing the configuration - but even this rule is not applied to all user APIs that may change the configuration. Instead, let the core-module [qed] decide whether an interface can change the configuration by supporting a new API function. We also revise the current rule, allowing all interfaces to change their configurations while laying the infrastructure for future modes where an interface would be blocked from making such a configuration. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 6 ++++++ drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 14 ++++++++++---- include/linux/qed/qed_if.h | 10 ++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 1e9f321f1ac4..d189871e8e23 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -915,6 +915,11 @@ static u32 qed_sb_release(struct qed_dev *cdev, return rc; } +static bool qed_can_link_change(struct qed_dev *cdev) +{ + return true; +} + static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params) { @@ -1177,6 +1182,7 @@ const struct qed_common_ops qed_common_ops_pass = { .sb_release = &qed_sb_release, .simd_handler_config = &qed_simd_handler_config, .simd_handler_clean = &qed_simd_handler_clean, + .can_link_change = &qed_can_link_change, .set_link = &qed_set_link, .get_link = &qed_get_current_link, .drain = &qed_drain, diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 2ac98d44c1e1..f1dd25ac5552 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -239,9 +239,9 @@ static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct qed_link_params params; u32 speed; - if (!edev->dev_info.common.is_mf_default) { + if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { DP_INFO(edev, - "Link parameters can not be changed in non-default mode\n"); + "Link settings are not allowed to be changed\n"); return -EOPNOTSUPP; } @@ -350,6 +350,12 @@ static int qede_nway_reset(struct net_device *dev) struct qed_link_output current_link; struct qed_link_params link_params; + if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { + DP_INFO(edev, + "Link settings are not allowed to be changed\n"); + return -EOPNOTSUPP; + } + if (!netif_running(dev)) return 0; @@ -450,9 +456,9 @@ static int qede_set_pauseparam(struct net_device *dev, struct qed_link_params params; struct qed_link_output current_link; - if (!edev->dev_info.common.is_mf_default) { + if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { DP_INFO(edev, - "Pause parameters can not be updated in non-default mode\n"); + "Pause settings are not allowed to be changed\n"); return -EOPNOTSUPP; } diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 82a7fe011068..e5de42b62976 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -211,6 +211,16 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + +/** + * @brief can_link_change - can the instance change the link or not + * + * @param cdev + * + * @return true if link-change is allowed, false otherwise. + */ + bool (*can_link_change)(struct qed_dev *cdev); + /** * @brief set_link - set links according to params * -- cgit v1.2.3 From 6fa01ccd883021105e9f8af7d04b9f156fa3494a Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Fri, 22 Apr 2016 18:36:35 -0700 Subject: skbuff: Add pskb_extract() helper function A pattern of skb usage seen in modules such as RDS-TCP is to extract `to_copy' bytes from the received TCP segment, starting at some offset `off' into a new skb `clone'. This is done in the ->data_ready callback, where the clone skb is queued up for rx on the PF_RDS socket, while the parent TCP segment is returned unchanged back to the TCP engine. The existing code uses the sequence clone = skb_clone(..); pskb_pull(clone, off, ..); pskb_trim(clone, to_copy, ..); with the intention of discarding the first `off' bytes. However, skb_clone() + pskb_pull() implies pksb_expand_head(), which ends up doing a redundant memcpy of bytes that will then get discarded in __pskb_pull_tail(). To avoid this inefficiency, this commit adds pskb_extract() that creates the clone, and memcpy's only the relevant header/frag/frag_list to the start of `clone'. pskb_trim() is then invoked to trim clone down to the requested to_copy bytes. Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 + net/core/skbuff.c | 242 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index da0ace389fec..a1ce63979ad8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2986,6 +2986,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); +struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, + gfp_t gfp); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7ff7788b0151..7a1d48983f81 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4622,3 +4622,245 @@ failure: return NULL; } EXPORT_SYMBOL(alloc_skb_with_frags); + +/* carve out the first off bytes from skb when off < headlen */ +static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, + const int headlen, gfp_t gfp_mask) +{ + int i; + int size = skb_end_offset(skb); + int new_hlen = headlen - off; + u8 *data; + int doff = 0; + + size = SKB_DATA_ALIGN(size); + + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); + if (!data) + return -ENOMEM; + + size = SKB_WITH_OVERHEAD(ksize(data)); + + /* Copy real data, and all frags */ + skb_copy_from_linear_data_offset(skb, off, data, new_hlen); + skb->len -= off; + + memcpy((struct skb_shared_info *)(data + size), + skb_shinfo(skb), + offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + if (skb_cloned(skb)) { + /* drop the old head gracefully */ + if (skb_orphan_frags(skb, gfp_mask)) { + kfree(data); + return -ENOMEM; + } + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + skb_frag_ref(skb, i); + if (skb_has_frag_list(skb)) + skb_clone_fraglist(skb); + skb_release_data(skb); + } else { + /* we can reuse existing recount- all we did was + * relocate values + */ + skb_free_head(skb); + } + + doff = (data - skb->head); + skb->head = data; + skb->data = data; + skb->head_frag = 0; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->end = size; + doff = 0; +#else + skb->end = skb->head + size; +#endif + skb_set_tail_pointer(skb, skb_headlen(skb)); + skb_headers_offset_update(skb, 0); + skb->cloned = 0; + skb->hdr_len = 0; + skb->nohdr = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); + + return 0; +} + +static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); + +/* carve out the first eat bytes from skb's frag_list. May recurse into + * pskb_carve() + */ +static int pskb_carve_frag_list(struct sk_buff *skb, + struct skb_shared_info *shinfo, int eat, + gfp_t gfp_mask) +{ + struct sk_buff *list = shinfo->frag_list; + struct sk_buff *clone = NULL; + struct sk_buff *insp = NULL; + + do { + if (!list) { + pr_err("Not enough bytes to eat. Want %d\n", eat); + return -EFAULT; + } + if (list->len <= eat) { + /* Eaten as whole. */ + eat -= list->len; + list = list->next; + insp = list; + } else { + /* Eaten partially. */ + if (skb_shared(list)) { + clone = skb_clone(list, gfp_mask); + if (!clone) + return -ENOMEM; + insp = list->next; + list = clone; + } else { + /* This may be pulled without problems. */ + insp = list; + } + if (pskb_carve(list, eat, gfp_mask) < 0) { + kfree_skb(clone); + return -ENOMEM; + } + break; + } + } while (eat); + + /* Free pulled out fragments. */ + while ((list = shinfo->frag_list) != insp) { + shinfo->frag_list = list->next; + kfree_skb(list); + } + /* And insert new clone at head. */ + if (clone) { + clone->next = list; + shinfo->frag_list = clone; + } + return 0; +} + +/* carve off first len bytes from skb. Split line (off) is in the + * non-linear part of skb + */ +static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, + int pos, gfp_t gfp_mask) +{ + int i, k = 0; + int size = skb_end_offset(skb); + u8 *data; + const int nfrags = skb_shinfo(skb)->nr_frags; + struct skb_shared_info *shinfo; + int doff = 0; + + size = SKB_DATA_ALIGN(size); + + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); + if (!data) + return -ENOMEM; + + size = SKB_WITH_OVERHEAD(ksize(data)); + + memcpy((struct skb_shared_info *)(data + size), + skb_shinfo(skb), offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + if (skb_orphan_frags(skb, gfp_mask)) { + kfree(data); + return -ENOMEM; + } + shinfo = (struct skb_shared_info *)(data + size); + for (i = 0; i < nfrags; i++) { + int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (pos + fsize > off) { + shinfo->frags[k] = skb_shinfo(skb)->frags[i]; + + if (pos < off) { + /* Split frag. + * We have two variants in this case: + * 1. Move all the frag to the second + * part, if it is possible. F.e. + * this approach is mandatory for TUX, + * where splitting is expensive. + * 2. Split is accurately. We make this. + */ + shinfo->frags[0].page_offset += off - pos; + skb_frag_size_sub(&shinfo->frags[0], off - pos); + } + skb_frag_ref(skb, i); + k++; + } + pos += fsize; + } + shinfo->nr_frags = k; + if (skb_has_frag_list(skb)) + skb_clone_fraglist(skb); + + if (k == 0) { + /* split line is in frag list */ + pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask); + } + skb_release_data(skb); + + doff = (data - skb->head); + skb->head = data; + skb->head_frag = 0; + skb->data = data; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->end = size; + doff = 0; +#else + skb->end = skb->head + size; +#endif + skb_reset_tail_pointer(skb); + skb_headers_offset_update(skb, 0); + skb->cloned = 0; + skb->hdr_len = 0; + skb->nohdr = 0; + skb->len -= off; + skb->data_len = skb->len; + atomic_set(&skb_shinfo(skb)->dataref, 1); + return 0; +} + +/* remove len bytes from the beginning of the skb */ +static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp) +{ + int headlen = skb_headlen(skb); + + if (len < headlen) + return pskb_carve_inside_header(skb, len, headlen, gfp); + else + return pskb_carve_inside_nonlinear(skb, len, headlen, gfp); +} + +/* Extract to_copy bytes starting at off from skb, and return this in + * a new skb + */ +struct sk_buff *pskb_extract(struct sk_buff *skb, int off, + int to_copy, gfp_t gfp) +{ + struct sk_buff *clone = skb_clone(skb, gfp); + + if (!clone) + return NULL; + + if (pskb_carve(clone, off, gfp) < 0 || + pskb_trim(clone, to_copy)) { + kfree_skb(clone); + return NULL; + } + return clone; +} +EXPORT_SYMBOL(pskb_extract); -- cgit v1.2.3 From 21e8868e661ededd2c45c8ec27f6fd5354b88b71 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 20 Apr 2016 11:20:28 +0100 Subject: drivers: firmware: psci: make two helper functions static psci_power_state_loses_context() and psci_power_state_is_valid are only used internally now, so make them static. Signed-off-by: Jisheng Zhang Signed-off-by: Lorenzo Pieralisi Signed-off-by: Arnd Bergmann --- drivers/firmware/psci.c | 4 ++-- include/linux/psci.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 0ab477ba6564..04f2ac54e5ab 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -91,7 +91,7 @@ static inline bool psci_has_ext_power_state(void) PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK; } -bool psci_power_state_loses_context(u32 state) +static bool psci_power_state_loses_context(u32 state) { const u32 mask = psci_has_ext_power_state() ? PSCI_1_0_EXT_POWER_STATE_TYPE_MASK : @@ -100,7 +100,7 @@ bool psci_power_state_loses_context(u32 state) return state & mask; } -bool psci_power_state_is_valid(u32 state) +static bool psci_power_state_is_valid(u32 state) { const u32 valid_mask = psci_has_ext_power_state() ? PSCI_1_0_EXT_POWER_STATE_MASK : diff --git a/include/linux/psci.h b/include/linux/psci.h index 393efe2edf9a..bdea1cb5e1db 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -21,8 +21,6 @@ #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 bool psci_tos_resident_on(int cpu); -bool psci_power_state_loses_context(u32 state); -bool psci_power_state_is_valid(u32 state); int psci_cpu_init_idle(unsigned int cpu); int psci_cpu_suspend_enter(unsigned long index); -- cgit v1.2.3 From 1fcbcc333f1fae6e11cc0839a6e72bc1a3e830bf Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 25 Apr 2016 16:50:14 -0700 Subject: block: copy NOMERGE flag from bio to request bio might have NOMERGE flag set, for example blk_queue_split sets it. When we initiate request, copy this flag too. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 86a38ea1823f..77e5d81f07aa 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -208,7 +208,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ - REQ_SECURE | REQ_INTEGRITY) + REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) -- cgit v1.2.3 From 4899f78a3dccda41ffdaa1a2cbf78209753e0f70 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 6 Apr 2016 12:37:37 -0500 Subject: mailbox/omap: drop legacy platform device support OMAP mailbox devices can no longer be created in legacy non-DT mode, all the relevant code has been cleaned up. The OMAP mailbox driver will only support devices created from DT going forward, so drop the legacy platform device support from the driver. Signed-off-by: Suman Anna Signed-off-by: Jassi Brar --- drivers/mailbox/omap-mailbox.c | 101 ++++++++++++----------------- include/linux/platform_data/mailbox-omap.h | 58 ----------------- 2 files changed, 41 insertions(+), 118 deletions(-) delete mode 100644 include/linux/platform_data/mailbox-omap.h (limited to 'include/linux') diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c index b7f636f15cac..8754d810ef05 100644 --- a/drivers/mailbox/omap-mailbox.c +++ b/drivers/mailbox/omap-mailbox.c @@ -2,7 +2,7 @@ * OMAP mailbox driver * * Copyright (C) 2006-2009 Nokia Corporation. All rights reserved. - * Copyright (C) 2013-2014 Texas Instruments Inc. + * Copyright (C) 2013-2016 Texas Instruments Incorporated - http://www.ti.com * * Contact: Hiroshi DOYU * Suman Anna @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -69,6 +68,10 @@ #define MBOX_NR_REGS (MBOX_REG_SIZE / sizeof(u32)) #define OMAP4_MBOX_NR_REGS (OMAP4_MBOX_REG_SIZE / sizeof(u32)) +/* Interrupt register configuration types */ +#define MBOX_INTR_CFG_TYPE1 0 +#define MBOX_INTR_CFG_TYPE2 1 + struct omap_mbox_fifo { unsigned long msg; unsigned long fifo_stat; @@ -696,8 +699,6 @@ static int omap_mbox_probe(struct platform_device *pdev) int ret; struct mbox_chan *chnls; struct omap_mbox **list, *mbox, *mboxblk; - struct omap_mbox_pdata *pdata = pdev->dev.platform_data; - struct omap_mbox_dev_info *info = NULL; struct omap_mbox_fifo_info *finfo, *finfoblk; struct omap_mbox_device *mdev; struct omap_mbox_fifo *fifo; @@ -710,36 +711,26 @@ static int omap_mbox_probe(struct platform_device *pdev) u32 l; int i; - if (!node && (!pdata || !pdata->info_cnt || !pdata->info)) { - pr_err("%s: platform not supported\n", __func__); + if (!node) { + pr_err("%s: only DT-based devices are supported\n", __func__); return -ENODEV; } - if (node) { - match = of_match_device(omap_mailbox_of_match, &pdev->dev); - if (!match) - return -ENODEV; - intr_type = (u32)match->data; + match = of_match_device(omap_mailbox_of_match, &pdev->dev); + if (!match) + return -ENODEV; + intr_type = (u32)match->data; - if (of_property_read_u32(node, "ti,mbox-num-users", - &num_users)) - return -ENODEV; + if (of_property_read_u32(node, "ti,mbox-num-users", &num_users)) + return -ENODEV; - if (of_property_read_u32(node, "ti,mbox-num-fifos", - &num_fifos)) - return -ENODEV; + if (of_property_read_u32(node, "ti,mbox-num-fifos", &num_fifos)) + return -ENODEV; - info_count = of_get_available_child_count(node); - if (!info_count) { - dev_err(&pdev->dev, "no available mbox devices found\n"); - return -ENODEV; - } - } else { /* non-DT device creation */ - info_count = pdata->info_cnt; - info = pdata->info; - intr_type = pdata->intr_type; - num_users = pdata->num_users; - num_fifos = pdata->num_fifos; + info_count = of_get_available_child_count(node); + if (!info_count) { + dev_err(&pdev->dev, "no available mbox devices found\n"); + return -ENODEV; } finfoblk = devm_kzalloc(&pdev->dev, info_count * sizeof(*finfoblk), @@ -750,38 +741,28 @@ static int omap_mbox_probe(struct platform_device *pdev) finfo = finfoblk; child = NULL; for (i = 0; i < info_count; i++, finfo++) { - if (node) { - child = of_get_next_available_child(node, child); - ret = of_property_read_u32_array(child, "ti,mbox-tx", - tmp, ARRAY_SIZE(tmp)); - if (ret) - return ret; - finfo->tx_id = tmp[0]; - finfo->tx_irq = tmp[1]; - finfo->tx_usr = tmp[2]; - - ret = of_property_read_u32_array(child, "ti,mbox-rx", - tmp, ARRAY_SIZE(tmp)); - if (ret) - return ret; - finfo->rx_id = tmp[0]; - finfo->rx_irq = tmp[1]; - finfo->rx_usr = tmp[2]; - - finfo->name = child->name; - - if (of_find_property(child, "ti,mbox-send-noirq", NULL)) - finfo->send_no_irq = true; - } else { - finfo->tx_id = info->tx_id; - finfo->rx_id = info->rx_id; - finfo->tx_usr = info->usr_id; - finfo->tx_irq = info->irq_id; - finfo->rx_usr = info->usr_id; - finfo->rx_irq = info->irq_id; - finfo->name = info->name; - info++; - } + child = of_get_next_available_child(node, child); + ret = of_property_read_u32_array(child, "ti,mbox-tx", tmp, + ARRAY_SIZE(tmp)); + if (ret) + return ret; + finfo->tx_id = tmp[0]; + finfo->tx_irq = tmp[1]; + finfo->tx_usr = tmp[2]; + + ret = of_property_read_u32_array(child, "ti,mbox-rx", tmp, + ARRAY_SIZE(tmp)); + if (ret) + return ret; + finfo->rx_id = tmp[0]; + finfo->rx_irq = tmp[1]; + finfo->rx_usr = tmp[2]; + + finfo->name = child->name; + + if (of_find_property(child, "ti,mbox-send-noirq", NULL)) + finfo->send_no_irq = true; + if (finfo->tx_id >= num_fifos || finfo->rx_id >= num_fifos || finfo->tx_usr >= num_users || finfo->rx_usr >= num_users) return -EINVAL; diff --git a/include/linux/platform_data/mailbox-omap.h b/include/linux/platform_data/mailbox-omap.h deleted file mode 100644 index 4631dbb4255e..000000000000 --- a/include/linux/platform_data/mailbox-omap.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * mailbox-omap.h - * - * Copyright (C) 2013 Texas Instruments, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _PLAT_MAILBOX_H -#define _PLAT_MAILBOX_H - -/* Interrupt register configuration types */ -#define MBOX_INTR_CFG_TYPE1 (0) -#define MBOX_INTR_CFG_TYPE2 (1) - -/** - * struct omap_mbox_dev_info - OMAP mailbox device attribute info - * @name: name of the mailbox device - * @tx_id: mailbox queue id used for transmitting messages - * @rx_id: mailbox queue id on which messages are received - * @irq_id: irq identifier number to use from the hwmod data - * @usr_id: mailbox user id for identifying the interrupt into - * the MPU interrupt controller. - */ -struct omap_mbox_dev_info { - const char *name; - u32 tx_id; - u32 rx_id; - u32 irq_id; - u32 usr_id; -}; - -/** - * struct omap_mbox_pdata - OMAP mailbox platform data - * @intr_type: type of interrupt configuration registers used - while programming mailbox queue interrupts - * @num_users: number of users (processor devices) that the mailbox - * h/w block can interrupt - * @num_fifos: number of h/w fifos within the mailbox h/w block - * @info_cnt: number of mailbox devices for the platform - * @info: array of mailbox device attributes - */ -struct omap_mbox_pdata { - u32 intr_type; - u32 num_users; - u32 num_fifos; - u32 info_cnt; - struct omap_mbox_dev_info *info; -}; - -#endif /* _PLAT_MAILBOX_H */ -- cgit v1.2.3 From dd28216528cf67339cd4f5854166fbd4eefd7fa8 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 6 Apr 2016 18:37:20 -0500 Subject: mailbox/omap: kill omap_mbox_{save/restore}_ctx() functions The omap_mbox_save_ctx() and omap_mbox_restore_ctx() API were previously provided to OMAP mailbox clients to save and restore the mailbox context during system suspend/resume. The save and restore functionality is now implemented through System PM driver callbacks, and there is no need for these functions, so kill these API. Signed-off-by: Suman Anna Signed-off-by: Jassi Brar --- drivers/mailbox/omap-mailbox.c | 51 ------------------------------------------ include/linux/omap-mailbox.h | 2 -- 2 files changed, 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c index d8d3a4bc5262..c5e8b9cb170d 100644 --- a/drivers/mailbox/omap-mailbox.c +++ b/drivers/mailbox/omap-mailbox.c @@ -55,13 +55,6 @@ #define MAILBOX_IRQ_NEWMSG(m) (1 << (2 * (m))) #define MAILBOX_IRQ_NOTFULL(m) (1 << (2 * (m) + 1)) -#define MBOX_REG_SIZE 0x120 - -#define OMAP4_MBOX_REG_SIZE 0x130 - -#define MBOX_NR_REGS (MBOX_REG_SIZE / sizeof(u32)) -#define OMAP4_MBOX_NR_REGS (OMAP4_MBOX_REG_SIZE / sizeof(u32)) - /* Interrupt register configuration types */ #define MBOX_INTR_CFG_TYPE1 0 #define MBOX_INTR_CFG_TYPE2 1 @@ -118,7 +111,6 @@ struct omap_mbox { struct omap_mbox_device *parent; struct omap_mbox_fifo tx_fifo; struct omap_mbox_fifo rx_fifo; - u32 ctx[OMAP4_MBOX_NR_REGS]; u32 intr_type; struct mbox_chan *chan; bool send_no_irq; @@ -209,49 +201,6 @@ static int is_mbox_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq) return (int)(enable & status & bit); } -void omap_mbox_save_ctx(struct mbox_chan *chan) -{ - int i; - int nr_regs; - struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan); - - if (WARN_ON(!mbox)) - return; - - if (mbox->intr_type) - nr_regs = OMAP4_MBOX_NR_REGS; - else - nr_regs = MBOX_NR_REGS; - for (i = 0; i < nr_regs; i++) { - mbox->ctx[i] = mbox_read_reg(mbox->parent, i * sizeof(u32)); - - dev_dbg(mbox->dev, "%s: [%02x] %08x\n", __func__, - i, mbox->ctx[i]); - } -} -EXPORT_SYMBOL(omap_mbox_save_ctx); - -void omap_mbox_restore_ctx(struct mbox_chan *chan) -{ - int i; - int nr_regs; - struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan); - - if (WARN_ON(!mbox)) - return; - - if (mbox->intr_type) - nr_regs = OMAP4_MBOX_NR_REGS; - else - nr_regs = MBOX_NR_REGS; - for (i = 0; i < nr_regs; i++) { - mbox_write_reg(mbox->parent, mbox->ctx[i], i * sizeof(u32)); - dev_dbg(mbox->dev, "%s: [%02x] %08x\n", __func__, - i, mbox->ctx[i]); - } -} -EXPORT_SYMBOL(omap_mbox_restore_ctx); - static void _omap_mbox_enable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq) { u32 l; diff --git a/include/linux/omap-mailbox.h b/include/linux/omap-mailbox.h index 587bbdd31f5a..c726bd833761 100644 --- a/include/linux/omap-mailbox.h +++ b/include/linux/omap-mailbox.h @@ -21,8 +21,6 @@ struct mbox_client; struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl, const char *chan_name); -void omap_mbox_save_ctx(struct mbox_chan *chan); -void omap_mbox_restore_ctx(struct mbox_chan *chan); void omap_mbox_enable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq); void omap_mbox_disable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq); -- cgit v1.2.3 From f0cdf76c103ffa34ca5ac87dcdef7edffc722cbf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 24 Apr 2016 21:38:14 +0200 Subject: net: remove NETDEV_TX_LOCKED support No more users in the tree, remove NETDEV_TX_LOCKED support. Adds another hole in softnet_stats struct, but better than keeping the unused collision counter around. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- Documentation/networking/netdev-features.txt | 10 ++++----- Documentation/networking/netdevices.txt | 9 +++----- include/linux/netdevice.h | 3 --- net/core/net-procfs.c | 3 ++- net/core/pktgen.c | 1 - net/sched/sch_generic.c | 32 ---------------------------- 6 files changed, 9 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index f310edec8a77..7413eb05223b 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -131,13 +131,11 @@ stack. Driver should not change behaviour based on them. * LLTX driver (deprecated for hardware drivers) -NETIF_F_LLTX should be set in drivers that implement their own locking in -transmit path or don't need locking at all (e.g. software tunnels). -In ndo_start_xmit, it is recommended to use a try_lock and return -NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly -protect against other callbacks (the rules you need to find out). +NETIF_F_LLTX is meant to be used by drivers that don't need locking at all, +e.g. software tunnels. -Don't use it for new drivers. +This is also used in a few legacy drivers that implement their +own locking, don't use it for new (hardware) drivers. * netns-local device diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index 0b1cf6b2a592..7fec2061a334 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -69,10 +69,9 @@ ndo_start_xmit: When the driver sets NETIF_F_LLTX in dev->features this will be called without holding netif_tx_lock. In this case the driver - has to lock by itself when needed. It is recommended to use a try lock - for this and return NETDEV_TX_LOCKED when the spin lock fails. - The locking there should also properly protect against - set_rx_mode. Note that the use of NETIF_F_LLTX is deprecated. + has to lock by itself when needed. + The locking there should also properly protect against + set_rx_mode. WARNING: use of NETIF_F_LLTX is deprecated. Don't use it for new drivers. Context: Process with BHs disabled or BH (timer), @@ -83,8 +82,6 @@ ndo_start_xmit: o NETDEV_TX_BUSY Cannot transmit packet, try later Usually a bug, means queue start/stop flow control is broken in the driver. Note: the driver must NOT put the skb in its DMA ring. - o NETDEV_TX_LOCKED Locking failed, please retry quickly. - Only valid when NETIF_F_LLTX is set. ndo_tx_timeout: Synchronization: netif_tx_lock spinlock; all TX queues frozen. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1f6d5db471a2..18d8394f2e5d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -106,7 +106,6 @@ enum netdev_tx { __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ NETDEV_TX_OK = 0x00, /* driver took care of packet */ NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ - NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ }; typedef enum netdev_tx netdev_tx_t; @@ -831,7 +830,6 @@ struct tc_to_netdev { * the queue before that can happen; it's for obsolete devices and weird * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. - * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required; cannot be NULL. * * netdev_features_t (*ndo_fix_features)(struct net_device *dev, @@ -2737,7 +2735,6 @@ struct softnet_data { /* stats */ unsigned int processed; unsigned int time_squeeze; - unsigned int cpu_collision; unsigned int received_rps; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 2bf83299600a..14d09345f00d 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -162,7 +162,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v) "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps, flow_limit_count); + 0, /* was cpu_collision */ + sd->received_rps, flow_limit_count); return 0; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 20999aa596dd..8604ae245960 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3472,7 +3472,6 @@ xmit_more: pkt_dev->odevname, ret); pkt_dev->errors++; /* fallthru */ - case NETDEV_TX_LOCKED: case NETDEV_TX_BUSY: /* Retry it next time */ atomic_dec(&(pkt_dev->skb->users)); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 80742edea96f..9c7756237904 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -108,35 +108,6 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, return skb; } -static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct netdev_queue *dev_queue, - struct Qdisc *q) -{ - int ret; - - if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { - /* - * Same CPU holding the lock. It may be a transient - * configuration error, when hard_start_xmit() recurses. We - * detect it by checking xmit owner and drop the packet when - * deadloop is detected. Return OK to try the next skb. - */ - kfree_skb_list(skb); - net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n", - dev_queue->dev->name); - ret = qdisc_qlen(q); - } else { - /* - * Another cpu is holding lock, requeue & delay xmits for - * some time. - */ - __this_cpu_inc(softnet_data.cpu_collision); - ret = dev_requeue_skb(skb, q); - } - - return ret; -} - /* * Transmit possibly several skbs, and handle the return status as * required. Holding the __QDISC___STATE_RUNNING bit guarantees that @@ -174,9 +145,6 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, if (dev_xmit_complete(ret)) { /* Driver sent out skb successfully or skb was consumed */ ret = qdisc_qlen(q); - } else if (ret == NETDEV_TX_LOCKED) { - /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, txq, q); } else { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely(ret != NETDEV_TX_BUSY)) -- cgit v1.2.3 From 9218b44dcc059e08e249f6f7614b8e391eb041d8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:47 +0300 Subject: net/mlx5e: Statistics handling refactoring Redesign ethtool statistics handling and reporting in the driver: 1. Move counters to a separate file (en_stats.h). 2. Remove unnecessary dependencies between stats and strings. 3. Use counter descriptors which hold a name and offset for each counter, and will be used to decide which counters will be exposed. For example when adding a new software counter to ethtool, instead of: 1. Add to stats struct. 2. Add to strings struct in the same order. 3. Change macro defining number of software counters. The only thing needed is to link the new counter to a counter descriptor. VPort counters are a set of hardware traffic counters created automatically for each virtual port opened. PPort counters are a set of counters describing per physical port performance statistics. These counters are gathered from hardware register and divided to groups according to different protocols. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 240 +--------------- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 132 ++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 232 +++++---------- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 318 +++++++++++++++++++++ include/linux/mlx5/device.h | 1 + 5 files changed, 483 insertions(+), 440 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6e24e821a1d8..e903eff2574f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -46,6 +46,7 @@ #include #include "wq.h" #include "mlx5_core.h" +#include "en_stats.h" #define MLX5E_MAX_NUM_TC 8 @@ -148,245 +149,6 @@ struct mlx5e_umr_wqe { #define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif -static const char vport_strings[][ETH_GSTRING_LEN] = { - /* vport statistics */ - "rx_packets", - "rx_bytes", - "tx_packets", - "tx_bytes", - "rx_error_packets", - "rx_error_bytes", - "tx_error_packets", - "tx_error_bytes", - "rx_unicast_packets", - "rx_unicast_bytes", - "tx_unicast_packets", - "tx_unicast_bytes", - "rx_multicast_packets", - "rx_multicast_bytes", - "tx_multicast_packets", - "tx_multicast_bytes", - "rx_broadcast_packets", - "rx_broadcast_bytes", - "tx_broadcast_packets", - "tx_broadcast_bytes", - - /* SW counters */ - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "lro_packets", - "lro_bytes", - "rx_csum_good", - "rx_csum_none", - "rx_csum_sw", - "tx_csum_offload", - "tx_csum_inner", - "tx_queue_stopped", - "tx_queue_wake", - "tx_queue_dropped", - "rx_wqe_err", - "rx_mpwqe_filler", - "rx_mpwqe_frag", - "rx_buff_alloc_err", -}; - -struct mlx5e_vport_stats { - /* HW counters */ - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - u64 rx_error_packets; - u64 rx_error_bytes; - u64 tx_error_packets; - u64 tx_error_bytes; - u64 rx_unicast_packets; - u64 rx_unicast_bytes; - u64 tx_unicast_packets; - u64 tx_unicast_bytes; - u64 rx_multicast_packets; - u64 rx_multicast_bytes; - u64 tx_multicast_packets; - u64 tx_multicast_bytes; - u64 rx_broadcast_packets; - u64 rx_broadcast_bytes; - u64 tx_broadcast_packets; - u64 tx_broadcast_bytes; - - /* SW counters */ - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 lro_packets; - u64 lro_bytes; - u64 rx_csum_good; - u64 rx_csum_none; - u64 rx_csum_sw; - u64 tx_csum_offload; - u64 tx_csum_inner; - u64 tx_queue_stopped; - u64 tx_queue_wake; - u64 tx_queue_dropped; - u64 rx_wqe_err; - u64 rx_mpwqe_filler; - u64 rx_mpwqe_frag; - u64 rx_buff_alloc_err; - -#define NUM_VPORT_COUNTERS 38 -}; - -static const char pport_strings[][ETH_GSTRING_LEN] = { - /* IEEE802.3 counters */ - "frames_tx", - "frames_rx", - "check_seq_err", - "alignment_err", - "octets_tx", - "octets_received", - "multicast_xmitted", - "broadcast_xmitted", - "multicast_rx", - "broadcast_rx", - "in_range_len_errors", - "out_of_range_len", - "too_long_errors", - "symbol_err", - "mac_control_tx", - "mac_control_rx", - "unsupported_op_rx", - "pause_ctrl_rx", - "pause_ctrl_tx", - - /* RFC2863 counters */ - "in_octets", - "in_ucast_pkts", - "in_discards", - "in_errors", - "in_unknown_protos", - "out_octets", - "out_ucast_pkts", - "out_discards", - "out_errors", - "in_multicast_pkts", - "in_broadcast_pkts", - "out_multicast_pkts", - "out_broadcast_pkts", - - /* RFC2819 counters */ - "drop_events", - "octets", - "pkts", - "broadcast_pkts", - "multicast_pkts", - "crc_align_errors", - "undersize_pkts", - "oversize_pkts", - "fragments", - "jabbers", - "collisions", - "p64octets", - "p65to127octets", - "p128to255octets", - "p256to511octets", - "p512to1023octets", - "p1024to1518octets", - "p1519to2047octets", - "p2048to4095octets", - "p4096to8191octets", - "p8192to10239octets", -}; - -#define NUM_IEEE_802_3_COUNTERS 19 -#define NUM_RFC_2863_COUNTERS 13 -#define NUM_RFC_2819_COUNTERS 21 -#define NUM_PPORT_COUNTERS (NUM_IEEE_802_3_COUNTERS + \ - NUM_RFC_2863_COUNTERS + \ - NUM_RFC_2819_COUNTERS) - -struct mlx5e_pport_stats { - __be64 IEEE_802_3_counters[NUM_IEEE_802_3_COUNTERS]; - __be64 RFC_2863_counters[NUM_RFC_2863_COUNTERS]; - __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS]; -}; - -static const char qcounter_stats_strings[][ETH_GSTRING_LEN] = { - "rx_out_of_buffer", -}; - -struct mlx5e_qcounter_stats { - u32 rx_out_of_buffer; -#define NUM_Q_COUNTERS 1 -}; - -static const char rq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "csum_none", - "csum_sw", - "lro_packets", - "lro_bytes", - "wqe_err", - "mpwqe_filler", - "mpwqe_frag", - "buff_alloc_err", -}; - -struct mlx5e_rq_stats { - u64 packets; - u64 bytes; - u64 csum_none; - u64 csum_sw; - u64 lro_packets; - u64 lro_bytes; - u64 wqe_err; - u64 mpwqe_filler; - u64 mpwqe_frag; - u64 buff_alloc_err; -#define NUM_RQ_STATS 10 -}; - -static const char sq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "csum_offload_inner", - "nop", - "csum_offload_none", - "stopped", - "wake", - "dropped", -}; - -struct mlx5e_sq_stats { - /* commonly accessed in data path */ - u64 packets; - u64 bytes; - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 csum_offload_inner; - u64 nop; - /* less likely accessed in data path */ - u64 csum_offload_none; - u64 stopped; - u64 wake; - u64 dropped; -#define NUM_SQ_STATS 12 -}; - -struct mlx5e_stats { - struct mlx5e_vport_stats vport; - struct mlx5e_pport_stats pport; - struct mlx5e_qcounter_stats qcnt; -}; - struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4077856aab76..f1649d543475 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -166,6 +166,12 @@ static const struct { }; #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) +#define MLX5E_NUM_RQ_STATS(priv) \ + (NUM_RQ_STATS * priv->params.num_channels * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_SQ_STATS(priv) \ + (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) static int mlx5e_get_sset_count(struct net_device *dev, int sset) { @@ -173,21 +179,68 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: - return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + return NUM_SW_COUNTERS + MLX5E_NUM_Q_CNTRS(priv) + - priv->params.num_channels * NUM_RQ_STATS + - priv->params.num_channels * priv->params.num_tc * - NUM_SQ_STATS; + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + MLX5E_NUM_RQ_STATS(priv) + + MLX5E_NUM_SQ_STATS(priv); /* fallthrough */ default: return -EOPNOTSUPP; } } +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + int i, j, tc, idx = 0; + + /* SW counters */ + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name); + + /* Q counters */ + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name); + + /* VPORT counters */ + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_stats_desc[i].name); + + /* PPORT counters */ + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_802_3_stats_desc[i].name); + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2863_stats_desc[i].name); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2819_stats_desc[i].name); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i, + rq_stats_desc[j].name); + + for (tc = 0; tc < priv->params.num_tc; tc++) + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "tx%d_%s", + priv->channeltc_to_txq_map[i][tc], + sq_stats_desc[j].name); +} + static void mlx5e_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - int i, j, tc, idx = 0; struct mlx5e_priv *priv = netdev_priv(dev); switch (stringset) { @@ -198,35 +251,7 @@ static void mlx5e_get_strings(struct net_device *dev, break; case ETH_SS_STATS: - /* VPORT counters */ - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_strings[i]); - - /* Q counters */ - for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - qcounter_stats_strings[i]); - - /* PPORT counters */ - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_strings[i]); - - /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - "rx%d_%s", i, rq_stats_strings[j]); - - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - sprintf(data + - (idx++) * ETH_GSTRING_LEN, - "tx%d_%s", - priv->channeltc_to_txq_map[i][tc], - sq_stats_strings[j]); + mlx5e_fill_stats_strings(priv, data); break; } } @@ -245,28 +270,45 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mlx5e_update_stats(priv); mutex_unlock(&priv->state_lock); - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - data[idx++] = ((u64 *)&priv->stats.vport)[i]; + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_stats_desc, i); for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - data[idx++] = ((u32 *)&priv->stats.qcnt)[i]; + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]); + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->rq.stats)[j]; + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats, + rq_stats_desc, j); for (tc = 0; tc < priv->params.num_tc; tc++) for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->sq[tc].stats)[j]; + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats, + sq_stats_desc, j); } static void mlx5e_get_ringparam(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6270f8d539db..0c532367ff13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -91,96 +91,15 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_pport_stats *s = &priv->stats.pport; - u32 *in; - u32 *out; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - - in = mlx5_vzalloc(sz); - out = mlx5_vzalloc(sz); - if (!in || !out) - goto free_out; - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->IEEE_802_3_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->IEEE_802_3_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2863_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2863_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2819_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2819_counters)); - -free_out: - kvfree(in); - kvfree(out); -} - -static void mlx5e_update_q_counter(struct mlx5e_priv *priv) -{ - struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; - - if (!priv->q_counter) - return; - - mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, - &qcnt->rx_out_of_buffer); -} - -void mlx5e_update_stats(struct mlx5e_priv *priv) +static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_sw_stats *s = &priv->stats.sw; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; - u32 *out; - int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u64 tx_offload_none; + u64 tx_offload_none = 0; int i, j; - out = mlx5_vzalloc(outlen); - if (!out) - return; - - /* Collect firts the SW counters and then HW for consistency */ - s->rx_packets = 0; - s->rx_bytes = 0; - s->tx_packets = 0; - s->tx_bytes = 0; - s->tso_packets = 0; - s->tso_bytes = 0; - s->tso_inner_packets = 0; - s->tso_inner_bytes = 0; - s->tx_queue_stopped = 0; - s->tx_queue_wake = 0; - s->tx_queue_dropped = 0; - s->tx_csum_inner = 0; - tx_offload_none = 0; - s->lro_packets = 0; - s->lro_bytes = 0; - s->rx_csum_none = 0; - s->rx_csum_sw = 0; - s->rx_wqe_err = 0; - s->rx_mpwqe_filler = 0; - s->rx_mpwqe_frag = 0; - s->rx_buff_alloc_err = 0; + memset(s, 0, sizeof(*s)); for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; @@ -212,7 +131,19 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) } } - /* HW counters */ + /* Update calculated offload counters */ + s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; + s->rx_csum_good = s->rx_packets - s->rx_csum_none - + s->rx_csum_sw; +} + +static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + struct mlx5_core_dev *mdev = priv->mdev; + memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, @@ -222,58 +153,56 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) memset(out, 0, outlen); - if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + u32 *in; + + in = mlx5_vzalloc(sz); + if (!in) goto free_out; -#define MLX5_GET_CTR(p, x) \ - MLX5_GET64(query_vport_counter_out, p, x) - - s->rx_error_packets = - MLX5_GET_CTR(out, received_errors.packets); - s->rx_error_bytes = - MLX5_GET_CTR(out, received_errors.octets); - s->tx_error_packets = - MLX5_GET_CTR(out, transmit_errors.packets); - s->tx_error_bytes = - MLX5_GET_CTR(out, transmit_errors.octets); - - s->rx_unicast_packets = - MLX5_GET_CTR(out, received_eth_unicast.packets); - s->rx_unicast_bytes = - MLX5_GET_CTR(out, received_eth_unicast.octets); - s->tx_unicast_packets = - MLX5_GET_CTR(out, transmitted_eth_unicast.packets); - s->tx_unicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_unicast.octets); - - s->rx_multicast_packets = - MLX5_GET_CTR(out, received_eth_multicast.packets); - s->rx_multicast_bytes = - MLX5_GET_CTR(out, received_eth_multicast.octets); - s->tx_multicast_packets = - MLX5_GET_CTR(out, transmitted_eth_multicast.packets); - s->tx_multicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_multicast.octets); - - s->rx_broadcast_packets = - MLX5_GET_CTR(out, received_eth_broadcast.packets); - s->rx_broadcast_bytes = - MLX5_GET_CTR(out, received_eth_broadcast.octets); - s->tx_broadcast_packets = - MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); - s->tx_broadcast_bytes = - MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + MLX5_SET(ppcnt_reg, in, local_port, 1); - /* Update calculated offload counters */ - s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; - s->rx_csum_good = s->rx_packets - s->rx_csum_none - - s->rx_csum_sw; + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - mlx5e_update_pport_counters(priv); - mlx5e_update_q_counter(priv); + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); free_out: - kvfree(out); + kvfree(in); +} + +static void mlx5e_update_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + + if (!priv->q_counter) + return; + + mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, + &qcnt->rx_out_of_buffer); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_update_sw_counters(priv); + mlx5e_update_q_counter(priv); + mlx5e_update_vport_counters(priv); + mlx5e_update_pport_counters(priv); } static void mlx5e_update_stats_work(struct work_struct *work) @@ -2073,37 +2002,28 @@ static struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; - stats->rx_packets = vstats->rx_packets; - stats->rx_bytes = vstats->rx_bytes; - stats->tx_packets = vstats->tx_packets; - stats->tx_bytes = vstats->tx_bytes; - -#define PPCNT_GET_802_3_CTR(fld) \ - (MLX5_GET64(eth_802_3_cntrs_grp_data_layout, \ - pstats->IEEE_802_3_counters, fld##_high)) - -#define PPCNT_GET_2863_CTR(fld) \ - (MLX5_GET64(eth_2863_cntrs_grp_data_layout, \ - pstats->RFC_2863_counters, fld##_high)) + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; - stats->tx_dropped = vstats->tx_queue_dropped; + stats->tx_dropped = sstats->tx_queue_dropped; stats->rx_length_errors = - PPCNT_GET_802_3_CTR(a_in_range_length_errors) + - PPCNT_GET_802_3_CTR(a_out_of_range_length_field) + - PPCNT_GET_802_3_CTR(a_frame_too_long_errors); + PPORT_802_3_GET(pstats, a_in_range_length_errors) + + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + + PPORT_802_3_GET(pstats, a_frame_too_long_errors); stats->rx_crc_errors = - PPCNT_GET_802_3_CTR(a_frame_check_sequence_errors); - stats->rx_frame_errors = - PPCNT_GET_802_3_CTR(a_alignment_errors); - stats->tx_aborted_errors = - PPCNT_GET_2863_CTR(if_out_discards); + PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); + stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); + stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); stats->tx_carrier_errors = - PPCNT_GET_802_3_CTR(a_symbol_error_during_carrier); + PPORT_802_3_GET(pstats, a_symbol_error_during_carrier); stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + stats->rx_frame_errors; stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; @@ -2111,8 +2031,8 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) /* vport multicast also counts packets that are dropped due to steering * or rx out of buffer */ - stats->multicast = vstats->rx_multicast_packets; - + stats->multicast = + VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); return stats; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h new file mode 100644 index 000000000000..116320d8fc42 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_STATS_H__ +#define __MLX5_EN_STATS_H__ + +#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \ + (*(u64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ + (*(u32 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + +#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) + +struct counter_desc { + char name[ETH_GSTRING_LEN]; + int offset; /* Byte offset */ +}; + +struct mlx5e_sw_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 lro_packets; + u64 lro_bytes; + u64 rx_csum_good; + u64 rx_csum_none; + u64 rx_csum_sw; + u64 tx_csum_offload; + u64 tx_csum_inner; + u64 tx_queue_stopped; + u64 tx_queue_wake; + u64 tx_queue_dropped; + u64 rx_wqe_err; + u64 rx_mpwqe_filler; + u64 rx_mpwqe_frag; + u64 rx_buff_alloc_err; +}; + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; +}; + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ + vstats->query_vport_out, c) + +struct mlx5e_vport_stats { + __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; +}; + +static const struct counter_desc vport_stats_desc[] = { + { "rx_error_packets", VPORT_COUNTER_OFF(received_errors.packets) }, + { "rx_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) }, + { "tx_error_packets", VPORT_COUNTER_OFF(transmit_errors.packets) }, + { "tx_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) }, + { "rx_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_unicast_bytes", VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, +}; + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_802_3_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; +}; + +static const struct counter_desc pport_802_3_stats_desc[] = { + { "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) }, + { "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "alignment_err", PPORT_802_3_OFF(a_alignment_errors) }, + { "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "octets_received", PPORT_802_3_OFF(a_octets_received_ok) }, + { "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "unsupported_op_rx", + PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "pause_ctrl_tx", + PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +static const struct counter_desc pport_2863_stats_desc[] = { + { "in_octets", PPORT_2863_OFF(if_in_octets) }, + { "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) }, + { "in_discards", PPORT_2863_OFF(if_in_discards) }, + { "in_errors", PPORT_2863_OFF(if_in_errors) }, + { "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) }, + { "out_octets", PPORT_2863_OFF(if_out_octets) }, + { "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) }, + { "out_discards", PPORT_2863_OFF(if_out_discards) }, + { "out_errors", PPORT_2863_OFF(if_out_errors) }, + { "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) }, + { "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) }, + { "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) }, + { "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) }, +}; + +static const struct counter_desc pport_2819_stats_desc[] = { + { "drop_events", PPORT_2819_OFF(ether_stats_drop_events) }, + { "octets", PPORT_2819_OFF(ether_stats_octets) }, + { "pkts", PPORT_2819_OFF(ether_stats_pkts) }, + { "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) }, + { "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) }, + { "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) }, + { "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) }, + { "fragments", PPORT_2819_OFF(ether_stats_fragments) }, + { "jabbers", PPORT_2819_OFF(ether_stats_jabbers) }, + { "collisions", PPORT_2819_OFF(ether_stats_collisions) }, + { "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "p1024to1518octets", + PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "p1519to2047octets", + PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "p2048to4095octets", + PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "p4096to8191octets", + PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "p8192to10239octets", + PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_none; + u64 csum_sw; + u64 lro_packets; + u64 lro_bytes; + u64 wqe_err; + u64 mpwqe_filler; + u64 mpwqe_frag; + u64 buff_alloc_err; +}; + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, +}; + +struct mlx5e_sq_stats { + /* commonly accessed in data path */ + u64 packets; + u64 bytes; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_offload_inner; + u64 nop; + /* less likely accessed in data path */ + u64 csum_offload_none; + u64 stopped; + u64 wake; + u64 dropped; +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) +#define NUM_PPORT_COUNTERS (NUM_PPORT_802_3_COUNTERS + \ + NUM_PPORT_2863_COUNTERS + \ + NUM_PPORT_2819_COUNTERS) +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) + +struct mlx5e_stats { + struct mlx5e_sw_stats sw; + struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; +}; + +#endif /* __MLX5_EN_STATS_H__ */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 03f8d719b680..8be44ca777ed 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -59,6 +59,7 @@ #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) +#define MLX5_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64) #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -- cgit v1.2.3 From 121fcdc84d8240d4dfe1f737befd5814b12623ee Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:50 +0300 Subject: net/mlx5e: Add link down events counter Expose link_down_events counter through ethtool -S. This counter is read from PPort statistics, then proccessed and stored as a special handling software counter. This counter is stored along software counters since it is the only PPort counter that it's size is not 64 bits. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 5 +++++ include/linux/mlx5/device.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ef66ba65f5cd..61e261c3d247 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -135,6 +135,10 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; s->rx_csum_good = s->rx_packets - s->rx_csum_none - s->rx_csum_sw; + + s->link_down_events = MLX5_GET(ppcnt_reg, + priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); } static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) @@ -183,6 +187,10 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { out = pstats->per_prio_counters[prio]; @@ -208,10 +216,10 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv) void mlx5e_update_stats(struct mlx5e_priv *priv) { - mlx5e_update_sw_counters(priv); mlx5e_update_q_counter(priv); mlx5e_update_vport_counters(priv); mlx5e_update_pport_counters(priv); + mlx5e_update_sw_counters(priv); } static void mlx5e_update_stats_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index de27eeaa3bd2..7cd8cb44b2ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,6 +71,9 @@ struct mlx5e_sw_stats { u64 rx_mpwqe_filler; u64 rx_mpwqe_frag; u64 rx_buff_alloc_err; + + /* Special handling counters */ + u64 link_down_events; }; static const struct counter_desc sw_stats_desc[] = { @@ -96,6 +99,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) }, }; struct mlx5e_qcounter_stats { @@ -178,6 +182,7 @@ struct mlx5e_pport_stats { __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; static const struct counter_desc pport_802_3_stats_desc[] = { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8be44ca777ed..942bccacd7b7 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1369,6 +1369,7 @@ enum { MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, + MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; -- cgit v1.2.3 From 94cb1ebbafd509210887eea6ced55c40da7b4baa Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 24 Apr 2016 22:51:52 +0300 Subject: net/mlx5e: Add support for RXALL netdev feature Introduce new access register named Ports Check Mask Register (PCMR) to control all HW checks on port. With this register, the driver can enable/disable Hardware FCS validation. When RXALL is enabled/disabled using ndo_set_features, enable/disable fcs check at HW. User can change HW configuration using rx-all flag at ethtool. Signed-off-by: Eran Ben Elisha Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 20 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/port.c | 49 +++++++++++++++++++++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/port.h | 4 ++ 4 files changed, 74 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d82bc6b697f9..ad0cb4aa593b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2139,6 +2139,14 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) return 0; } +static int set_feature_rx_all(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_set_port_fcs(mdev, !enable); +} + static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t wanted_features, netdev_features_t feature, @@ -2174,6 +2182,8 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_vlan_filter); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, set_feature_tc_num_filters); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, + set_feature_rx_all); return err ? -EINVAL : 0; } @@ -2564,6 +2574,8 @@ static void mlx5e_build_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + bool fcs_supported; + bool fcs_enabled; SET_NETDEV_DEV(netdev, &mdev->pdev->dev); @@ -2607,10 +2619,18 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); + + if (fcs_supported) + netdev->hw_features |= NETIF_F_RXALL; + netdev->features = netdev->hw_features; if (!priv->params.lro_en) netdev->features &= ~NETIF_F_LRO; + if (fcs_enabled) + netdev->features &= ~NETIF_F_RXALL; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index ae378c575deb..c37740f30fbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -607,3 +607,52 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) return err; } EXPORT_SYMBOL_GPL(mlx5_query_port_wol); + +static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + outlen, MLX5_REG_PCMR, 0, 0); +} + +static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + + return mlx5_core_access_reg(mdev, in, inlen, out, + sizeof(out), MLX5_REG_PCMR, 0, 1); +} + +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, fcs_chk, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + *supported = false; + *enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap)); + *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcd5ac8d3b14..497a4dbd91b0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -112,6 +112,7 @@ enum { MLX5_REG_PMPE = 0x5010, MLX5_REG_PELC = 0x500e, MLX5_REG_PVLC = 0x500f, + MLX5_REG_PCMR = 0x5041, MLX5_REG_PMLP = 0, /* TBD */ MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a1d145abd4eb..577e953d0aa7 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -84,4 +84,8 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled); + #endif /* __MLX5_PORT_H__ */ -- cgit v1.2.3 From da54d24ec3ef736de04c61a01653776a9750334f Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:53 +0300 Subject: net/mlx5e: Add ethtool support for interface identify (LED blinking) Add the needed hardware command and mlx5_ifc structs for managing LED control. Add set_phys_id ethtool callback to support ethtool -p flag. Signed-off-by: Gal Pressman Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 25 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/port.c | 13 +++++++++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/port.h | 6 ++++++ 4 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 522d584bc05f..a2c444ec191b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1135,6 +1135,30 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static int mlx5e_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 beacon_duration; + + if (!MLX5_CAP_GEN(mdev, beacon_led)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = MLX5_BEACON_DURATION_INF; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = MLX5_BEACON_DURATION_OFF; + break; + default: + return -EOPNOTSUPP; + } + + return mlx5_set_port_beacon(mdev, beacon_duration); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1159,6 +1183,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_pauseparam = mlx5e_get_pauseparam, .set_pauseparam = mlx5e_set_pauseparam, .get_ts_info = mlx5e_get_ts_info, + .set_phys_id = mlx5e_set_phys_id, .get_wol = mlx5e_get_wol, .set_wol = mlx5e_set_wol, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index c37740f30fbe..446549f63b5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -115,6 +115,19 @@ int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, } EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) +{ + u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; + u32 in[MLX5_ST_SZ_DW(mlcr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(mlcr_reg, in, local_port, 1); + MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MLCR, 0, 1); +} + int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 497a4dbd91b0..2e8758d1b19e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -116,6 +116,7 @@ enum { MLX5_REG_PMLP = 0, /* TBD */ MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MLCR = 0x902b, }; enum { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 577e953d0aa7..a364ab1737a0 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -35,6 +35,11 @@ #include +enum mlx5_beacon_duration { + MLX5_BEACON_DURATION_OFF = 0x0, + MLX5_BEACON_DURATION_INF = 0xffff, +}; + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); @@ -53,6 +58,7 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -- cgit v1.2.3 From bb64143eee8c036a89b31daa4e9bf8360a8bded1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:54 +0300 Subject: net/mlx5e: Add ethtool support for dump module EEPROM Add query MCIA, PMLP registers infrastructure and commands. Add ethtool support for get_module_info() and get_module_eeprom() callbacks. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 80 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/port.c | 76 ++++++++++++++++++++ include/linux/mlx5/driver.h | 3 +- include/linux/mlx5/port.h | 15 ++++ 4 files changed, 173 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index a2c444ec191b..0518c8658507 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1159,6 +1159,84 @@ static int mlx5e_set_phys_id(struct net_device *dev, return mlx5_set_port_beacon(mdev, beacon_duration); } +static int mlx5e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + u8 data[4]; + + size_read = mlx5_query_module_eeprom(dev, 0, 2, data); + if (size_read < 2) + return -EIO; + + /* data[0] = identifier byte */ + switch (data[0]) { + case MLX5_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + /* data[1] = revision id */ + if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX5_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", + __func__, data[0]); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int offset = ee->offset; + int size_read; + int i = 0; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i, + data + i); + + if (!size_read) + /* Done reading */ + return 0; + + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", + __func__, size_read); + return 0; + } + + i += size_read; + offset += size_read; + } + + return 0; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1186,4 +1264,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_phys_id = mlx5e_set_phys_id, .get_wol = mlx5e_get_wol, .set_wol = mlx5e_set_wol, + .get_module_info = mlx5e_get_module_info, + .get_module_eeprom = mlx5e_get_module_eeprom, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 446549f63b5c..4cb2a44510fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -310,6 +310,82 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); +static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)]; + int module_mapping; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmlp_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return err; + + module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping); + *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK; + + return 0; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + u32 in[MLX5_ST_SZ_DW(mcia_reg)]; + int module_num; + u16 i2c_addr; + int status; + int err; + void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + err = mlx5_query_module_num(dev, &module_num); + if (err) + return err; + + memset(in, 0, sizeof(in)); + size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + + if (offset < MLX5_EEPROM_PAGE_LENGTH && + offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + + i2c_addr = MLX5_I2C_ADDR_LOW; + if (offset >= MLX5_EEPROM_PAGE_LENGTH) { + i2c_addr = MLX5_I2C_ADDR_HIGH; + offset -= MLX5_EEPROM_PAGE_LENGTH; + } + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, device_address, offset); + MLX5_SET(mcia_reg, in, size, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + memcpy(data, ptr, size); + + return size; +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); + static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2e8758d1b19e..1a170672c656 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -113,9 +113,10 @@ enum { MLX5_REG_PELC = 0x500e, MLX5_REG_PVLC = 0x500f, MLX5_REG_PCMR = 0x5041, - MLX5_REG_PMLP = 0, /* TBD */ + MLX5_REG_PMLP = 0x5002, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, }; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a364ab1737a0..7391eb833253 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -40,6 +40,19 @@ enum mlx5_beacon_duration { MLX5_BEACON_DURATION_INF = 0xffff, }; +enum mlx5_module_id { + MLX5_MODULE_ID_SFP = 0x3, + MLX5_MODULE_ID_QSFP = 0xC, + MLX5_MODULE_ID_QSFP_PLUS = 0xD, + MLX5_MODULE_ID_QSFP28 = 0x11, +}; + +#define MLX5_EEPROM_MAX_BYTES 32 +#define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff +#define MLX5_I2C_ADDR_LOW 0x50 +#define MLX5_I2C_ADDR_HIGH 0x51 +#define MLX5_EEPROM_PAGE_LENGTH 256 + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); @@ -93,5 +106,7 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, bool *enabled); +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data); #endif /* __MLX5_PORT_H__ */ -- cgit v1.2.3 From 363501145e3faa650193722fe7047b767ed87172 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:55 +0300 Subject: net/mlx5e: Add ethtool support for rxvlan-offload (vlan stripping) Use ethtool -K rxvlan to enable/disable C-TAG vlan stripping by hardware. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 74 ++++++++++++++++++++++- include/linux/mlx5/driver.h | 4 ++ 3 files changed, 78 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e903eff2574f..8abc289ac1fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -166,6 +166,7 @@ struct mlx5e_params { u8 rss_hfunc; u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + bool vlan_strip_disable; #ifdef CONFIG_MLX5_CORE_EN_DCB struct ieee_ets ets; #endif @@ -575,6 +576,8 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); + int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ad0cb4aa593b..6c9c10c131a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -388,6 +388,7 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -402,7 +403,8 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) return err; } -static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, + int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; @@ -430,6 +432,36 @@ static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) return err; } +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, vsd); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + static void mlx5e_disable_rq(struct mlx5e_rq *rq) { mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); @@ -468,7 +500,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; - err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; @@ -493,7 +525,7 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); while (!mlx5_wq_ll_is_empty(&rq->wq)) msleep(20); @@ -1963,6 +1995,23 @@ static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv, i); } +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) +{ + int err = 0; + int i; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd); + if (err) + return err; + } + + return 0; +} + static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -2147,6 +2196,23 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + priv->params.vlan_strip_disable = !enable; + err = mlx5e_modify_rqs_vsd(priv, !enable); + if (err) + priv->params.vlan_strip_disable = enable; + + mutex_unlock(&priv->state_lock); + + return err; +} + static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t wanted_features, netdev_features_t feature, @@ -2184,6 +2250,8 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_tc_num_filters); err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, + set_feature_rx_vlan); return err ? -EINVAL : 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1a170672c656..2cc5e9fd5913 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -45,6 +45,10 @@ #include #include +enum { + MLX5_RQ_BITMASK_VSD = 1 << 1, +}; + enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, -- cgit v1.2.3 From 1b223dd391622fde05e03829d813c3c6cc998685 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 24 Apr 2016 22:51:56 +0300 Subject: net/mlx5e: Fix checksum handling for non-stripped vlan packets Now as rx-vlan offload can be disabled, packets can be received with vlan tag not stripped, which means is_first_ethertype_ip will return false, for that we need to check if the hardware reported csum OK so we will report CHECKSUM_UNNECESSARY for those packets. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 20 +++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 8 ++++++-- include/linux/mlx5/device.h | 21 ++++++++++++++++----- 4 files changed, 38 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6c9c10c131a0..5bad17d37d7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -109,6 +109,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_sw += rq_stats->csum_sw; + s->rx_csum_inner += rq_stats->csum_inner; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_mpwqe_frag += rq_stats->mpwqe_frag; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 918b7c7fd74f..23adfe2fcba9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -543,16 +543,26 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (likely(is_first_ethertype_ip(skb))) { + return; + } + + if (is_first_ethertype_ip(skb)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); rq->stats.csum_sw++; - } else { - goto csum_none; + return; } - return; - + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; + skb->encapsulation = 1; + rq->stats.csum_inner++; + } + return; + } csum_none: skb->ip_summed = CHECKSUM_NONE; rq->stats.csum_none++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 7cd8cb44b2ab..115752b53d85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -62,6 +62,7 @@ struct mlx5e_sw_stats { u64 rx_csum_good; u64 rx_csum_none; u64 rx_csum_sw; + u64 rx_csum_inner; u64 tx_csum_offload; u64 tx_csum_inner; u64 tx_queue_stopped; @@ -90,6 +91,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -272,8 +274,9 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { struct mlx5e_rq_stats { u64 packets; u64 bytes; - u64 csum_none; u64 csum_sw; + u64 csum_inner; + u64 csum_none; u64 lro_packets; u64 lro_bytes; u64 wqe_err; @@ -285,8 +288,9 @@ struct mlx5e_rq_stats { static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) }, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 942bccacd7b7..6bd429b53b77 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -645,8 +645,9 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[2]; - __be16 wqe_id; + u8 outer_l3_tunneled; + u8 rsvd0; + __be16 wqe_id; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; @@ -659,7 +660,7 @@ struct mlx5_cqe64 { __be16 slid; __be32 flags_rqpn; u8 hds_ip_ext; - u8 l4_hdr_type_etc; + u8 l4_l3_hdr_type; __be16 vlan_info; __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; @@ -680,12 +681,22 @@ static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) { - return (cqe->l4_hdr_type_etc >> 4) & 0x7; + return (cqe->l4_l3_hdr_type >> 4) & 0x7; +} + +static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe) +{ + return (cqe->l4_l3_hdr_type >> 2) & 0x3; +} + +static inline u8 cqe_is_tunneled(struct mlx5_cqe64 *cqe) +{ + return cqe->outer_l3_tunneled & 0x1; } static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe) { - return !!(cqe->l4_hdr_type_etc & 0x1); + return !!(cqe->l4_l3_hdr_type & 0x1); } static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) -- cgit v1.2.3 From db0a6fb5d97afe01fd9c47d37c6daa82d4d4001d Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 21 Apr 2016 14:14:01 -0400 Subject: audit: add tty field to LOGIN event The tty field was missing from AUDIT_LOGIN events. Refactor code to create a new function audit_get_tty(), using it to replace the call in audit_log_task_info() and to add it to audit_log_set_loginuid(). Lock and bump the kref to protect it, adding audit_put_tty() alias to decrement it. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 24 ++++++++++++++++++++++++ kernel/audit.c | 18 +++++------------- kernel/auditsc.c | 8 ++++++-- 3 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b40ed5df5542..32cdafb312d8 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -26,6 +26,7 @@ #include #include #include +#include #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) @@ -343,6 +344,23 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid; } +static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) +{ + struct tty_struct *tty = NULL; + unsigned long flags; + + spin_lock_irqsave(&tsk->sighand->siglock, flags); + if (tsk->signal) + tty = tty_kref_get(tsk->signal->tty); + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + return tty; +} + +static inline void audit_put_tty(struct tty_struct *tty) +{ + tty_kref_put(tty); +} + extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern void __audit_bprm(struct linux_binprm *bprm); @@ -500,6 +518,12 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return -1; } +static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) +{ + return NULL; +} +static inline void audit_put_tty(struct tty_struct *tty) +{ } static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, diff --git a/kernel/audit.c b/kernel/audit.c index f52fbefede09..384374a1d232 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -64,7 +64,6 @@ #include #endif #include -#include #include #include @@ -1871,21 +1870,14 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; char comm[sizeof(tsk->comm)]; - char *tty; + struct tty_struct *tty; if (!ab) return; /* tsk == current */ cred = current_cred(); - - spin_lock_irq(&tsk->sighand->siglock); - if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) - tty = tsk->signal->tty->name; - else - tty = "(none)"; - spin_unlock_irq(&tsk->sighand->siglock); - + tty = audit_get_tty(tsk); audit_log_format(ab, " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" @@ -1901,11 +1893,11 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), - tty, audit_get_sessionid(tsk)); - + tty ? tty_name(tty) : "(none)", + audit_get_sessionid(tsk)); + audit_put_tty(tty); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, tsk)); - audit_log_d_path_exe(ab, tsk->mm); audit_log_task_context(ab); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 195ffaee50b9..71e14d836e69 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1980,6 +1980,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, { struct audit_buffer *ab; uid_t uid, oldloginuid, loginuid; + struct tty_struct *tty; if (!audit_enabled) return; @@ -1987,14 +1988,17 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid), + tty = audit_get_tty(current); ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); audit_log_task_context(ab); - audit_log_format(ab, " old-auid=%u auid=%u old-ses=%u ses=%u res=%d", - oldloginuid, loginuid, oldsessionid, sessionid, !rc); + audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", + oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", + oldsessionid, sessionid, !rc); + audit_put_tty(tty); audit_log_end(ab); } -- cgit v1.2.3 From dd80b54b18db3d76a43558daaa6ea3aa67f5aacd Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 20 Apr 2016 15:39:11 +0200 Subject: USB: LTM also for USB 3.1 LTM is also defined for SS+. The correct test is to check for anything slower than SS not exactly SS. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 6a9a0c28415d..29aba76017ee 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -720,7 +720,7 @@ extern void usb_enable_ltm(struct usb_device *udev); static inline bool usb_device_supports_ltm(struct usb_device *udev) { - if (udev->speed != USB_SPEED_SUPER || !udev->bos || !udev->bos->ss_cap) + if (udev->speed < USB_SPEED_SUPER || !udev->bos || !udev->bos->ss_cap) return false; return udev->bos->ss_cap->bmAttributes & USB_LTM_SUPPORT; } -- cgit v1.2.3 From fca504f6054f2a62d966afde23a0cfbf9e3dc32f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 20 Apr 2016 15:39:12 +0200 Subject: USB: correct intervals for SS+ SS+ also expresses intervals in units of 125ms. Testing must be for SS or faster, not SS exactly. Signed-off-by: Oliver neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 29aba76017ee..7824f4557d50 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1569,7 +1569,7 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * Initializes a interrupt urb with the proper information needed to submit * it to a device. * - * Note that High Speed and SuperSpeed interrupt endpoints use a logarithmic + * Note that High Speed and SuperSpeed(+) interrupt endpoints use a logarithmic * encoding of the endpoint interval, and express polling intervals in * microframes (eight per millisecond) rather than in frames (one per * millisecond). @@ -1595,7 +1595,7 @@ static inline void usb_fill_int_urb(struct urb *urb, urb->complete = complete_fn; urb->context = context; - if (dev->speed == USB_SPEED_HIGH || dev->speed == USB_SPEED_SUPER) { + if (dev->speed == USB_SPEED_HIGH || dev->speed >= USB_SPEED_SUPER) { /* make sure interval is within allowed range */ interval = clamp(interval, 1, 16); -- cgit v1.2.3 From dad56ee742a3abbb5d9e8108f8537d412bff3f57 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 26 Apr 2016 21:22:22 -0400 Subject: tracing: Move event_trigger_unlock_commit{_regs}() to local header The functions event_trigger_unlock_commit() and event_trigger_unlock_commit_regs() are no longer used outside the tracing system. Move them out of the generic headers and into the local one. Along with __event_trigger_test_discard() that is only used by them. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 94 -------------------------------------------- kernel/trace/trace.h | 94 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5f89a5b0c7e6..70a181cb3585 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -452,100 +452,6 @@ trace_trigger_soft_disabled(struct trace_event_file *file) return false; } -/* - * Helper function for event_trigger_unlock_commit{_regs}(). - * If there are event triggers attached to this event that requires - * filtering against its fields, then they wil be called as the - * entry already holds the field information of the current event. - * - * It also checks if the event should be discarded or not. - * It is to be discarded if the event is soft disabled and the - * event was only recorded to process triggers, or if the event - * filter is active and this event did not match the filters. - * - * Returns true if the event is discarded, false otherwise. - */ -static inline bool -__event_trigger_test_discard(struct trace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, - enum event_trigger_type *tt) -{ - unsigned long eflags = file->flags; - - if (eflags & EVENT_FILE_FL_TRIGGER_COND) - *tt = event_triggers_call(file, entry); - - if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) - ring_buffer_discard_commit(buffer, event); - else if (!filter_check_discard(file, entry, buffer, event)) - return false; - - return true; -} - -/** - * event_trigger_unlock_commit - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - */ -static inline void -event_trigger_unlock_commit(struct trace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); - - if (tt) - event_triggers_post_call(file, tt, entry); -} - -/** - * event_trigger_unlock_commit_regs - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - * - * Same as event_trigger_unlock_commit() but calls - * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). - */ -static inline void -event_trigger_unlock_commit_regs(struct trace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc, - struct pt_regs *regs) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit_regs(file->tr, buffer, event, - irq_flags, pc, regs); - - if (tt) - event_triggers_post_call(file, tt, entry); -} - #ifdef CONFIG_BPF_EVENTS unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); #else diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 727a3d28bce5..c0eac7b1e5a6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1065,6 +1065,100 @@ struct trace_subsystem_dir { int nr_events; }; +/* + * Helper function for event_trigger_unlock_commit{_regs}(). + * If there are event triggers attached to this event that requires + * filtering against its fields, then they wil be called as the + * entry already holds the field information of the current event. + * + * It also checks if the event should be discarded or not. + * It is to be discarded if the event is soft disabled and the + * event was only recorded to process triggers, or if the event + * filter is active and this event did not match the filters. + * + * Returns true if the event is discarded, false otherwise. + */ +static inline bool +__event_trigger_test_discard(struct trace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, + enum event_trigger_type *tt) +{ + unsigned long eflags = file->flags; + + if (eflags & EVENT_FILE_FL_TRIGGER_COND) + *tt = event_triggers_call(file, entry); + + if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) + ring_buffer_discard_commit(buffer, event); + else if (!filter_check_discard(file, entry, buffer, event)) + return false; + + return true; +} + +/** + * event_trigger_unlock_commit - handle triggers and finish event commit + * @file: The file pointer assoctiated to the event + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself + * @irq_flags: The state of the interrupts at the start of the event + * @pc: The state of the preempt count at the start of the event. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and + * if the event is soft disabled and should be discarded. + */ +static inline void +event_trigger_unlock_commit(struct trace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, unsigned long irq_flags, int pc) +{ + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) + trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); + + if (tt) + event_triggers_post_call(file, tt, entry); +} + +/** + * event_trigger_unlock_commit_regs - handle triggers and finish event commit + * @file: The file pointer assoctiated to the event + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself + * @irq_flags: The state of the interrupts at the start of the event + * @pc: The state of the preempt count at the start of the event. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and + * if the event is soft disabled and should be discarded. + * + * Same as event_trigger_unlock_commit() but calls + * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). + */ +static inline void +event_trigger_unlock_commit_regs(struct trace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, unsigned long irq_flags, int pc, + struct pt_regs *regs) +{ + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) + trace_buffer_unlock_commit_regs(file->tr, buffer, event, + irq_flags, pc, regs); + + if (tt) + event_triggers_post_call(file, tt, entry); +} + #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) -- cgit v1.2.3 From da20dfe6b50ea4c1a82797b7ee8655a370535d73 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Apr 2016 12:53:29 -0700 Subject: fs: fix over-zealous use of "const" When I was fixing up const recommendations from checkpatch.pl, I went overboard. This fixes the warning (during a W=1 build): include/linux/fs.h:2627:74: warning: type qualifiers ignored on function return type [-Wignored-qualifiers] static inline const char * const kernel_read_file_id_str(enum kernel_read_file_id id) Reported-by: Andy Shevchenko Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 90477550b935..9847d5c49a0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2600,7 +2600,7 @@ static const char * const kernel_read_file_str[] = { __kernel_read_file_id(__fid_stringify) }; -static inline const char * const kernel_read_file_id_str(enum kernel_read_file_id id) +static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) { if (id < 0 || id >= READING_MAX_ID) return kernel_read_file_str[READING_UNKNOWN]; -- cgit v1.2.3 From c5dfd78eb79851e278b7973031b9ca363da87a7e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Apr 2016 12:28:50 -0300 Subject: perf core: Allow setting up max frame stack depth via sysctl The default remains 127, which is good for most cases, and not even hit most of the time, but then for some cases, as reported by Brendan, 1024+ deep frames are appearing on the radar for things like groovy, ruby. And in some workloads putting a _lower_ cap on this may make sense. One that is per event still needs to be put in place tho. The new file is: # cat /proc/sys/kernel/perf_event_max_stack 127 Chaging it: # echo 256 > /proc/sys/kernel/perf_event_max_stack # cat /proc/sys/kernel/perf_event_max_stack 256 But as soon as there is some event using callchains we get: # echo 512 > /proc/sys/kernel/perf_event_max_stack -bash: echo: write error: Device or resource busy # Because we only allocate the callchain percpu data structures when there is a user, which allows for changing the max easily, its just a matter of having no callchain users at that point. Reported-and-Tested-by: Brendan Gregg Reviewed-by: Frederic Weisbecker Acked-by: Alexei Starovoitov Acked-by: David Ahern Cc: Adrian Hunter Cc: Alexander Shishkin Cc: He Kuang Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/r/20160426002928.GB16708@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/sysctl/kernel.txt | 14 ++++++++++++++ arch/arm/kernel/perf_callchain.c | 2 +- arch/arm64/kernel/perf_callchain.c | 4 ++-- arch/metag/kernel/perf_callchain.c | 2 +- arch/mips/kernel/perf_event.c | 4 ++-- arch/powerpc/perf/callchain.c | 4 ++-- arch/sparc/kernel/perf_event.c | 6 +++--- arch/x86/events/core.c | 4 ++-- arch/xtensa/kernel/perf_event.c | 4 ++-- include/linux/perf_event.h | 8 ++++++-- kernel/bpf/stackmap.c | 8 ++++---- kernel/events/callchain.c | 35 +++++++++++++++++++++++++++++++++-- kernel/sysctl.c | 12 ++++++++++++ 13 files changed, 84 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 57653a44b128..260cde08e92e 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -60,6 +60,7 @@ show up in /proc/sys/kernel: - panic_on_warn - perf_cpu_time_max_percent - perf_event_paranoid +- perf_event_max_stack - pid_max - powersave-nap [ PPC only ] - printk @@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN). The default value is 1. ============================================================== +perf_event_max_stack: + +Controls maximum number of stack frames to copy for (attr.sample_type & +PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using +'perf record -g' or 'perf trace --call-graph fp'. + +This can only be done when no events are in use that have callchains +enabled, otherwise writing to this file will return -EBUSY. + +The default value is 127. + +============================================================== + pid_max: PID allocation wrap value. When the kernel's next PID value diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 4e02ae5950ff..27563befa8a2 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) tail = (struct frame_tail __user *)regs->ARM_fp - 1; - while ((entry->nr < PERF_MAX_STACK_DEPTH) && + while ((entry->nr < sysctl_perf_event_max_stack) && tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); } diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index ff4665462a02..32c3c6e70119 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry, tail = (struct frame_tail __user *)regs->regs[29]; - while (entry->nr < PERF_MAX_STACK_DEPTH && + while (entry->nr < sysctl_perf_event_max_stack && tail && !((unsigned long)tail & 0xf)) tail = user_backtrace(tail, entry); } else { @@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry, tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; - while ((entry->nr < PERF_MAX_STACK_DEPTH) && + while ((entry->nr < sysctl_perf_event_max_stack) && tail && !((unsigned long)tail & 0x3)) tail = compat_user_backtrace(tail, entry); #endif diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c index 315633461a94..252abc12a5a3 100644 --- a/arch/metag/kernel/perf_callchain.c +++ b/arch/metag/kernel/perf_callchain.c @@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) --frame; - while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame) + while ((entry->nr < sysctl_perf_event_max_stack) && frame) frame = user_backtrace(frame, entry); } diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index c1cf9c6c3f77..5021c546ad07 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry, addr = *sp++; if (__kernel_text_address(addr)) { perf_callchain_store(entry, addr); - if (entry->nr >= PERF_MAX_STACK_DEPTH) + if (entry->nr >= sysctl_perf_event_max_stack) break; } } @@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, } do { perf_callchain_store(entry, pc); - if (entry->nr >= PERF_MAX_STACK_DEPTH) + if (entry->nr >= sysctl_perf_event_max_stack) break; pc = unwind_stack(current, &sp, pc, &ra); } while (pc); diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index e04a6752b399..22d9015c1acc 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { fp = (unsigned long __user *) sp; if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) return; @@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { fp = (unsigned int __user *) (unsigned long) sp; if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) return; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6596f66ce112..a4b8b5aed21c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, } } #endif - } while (entry->nr < PERF_MAX_STACK_DEPTH); + } while (entry->nr < sysctl_perf_event_max_stack); } static inline int @@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; perf_callchain_store(entry, pc); - } while (entry->nr < PERF_MAX_STACK_DEPTH); + } while (entry->nr < sysctl_perf_event_max_stack); } static void perf_callchain_user_32(struct perf_callchain_entry *entry, @@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, ufp = (unsigned long)sf.fp; } perf_callchain_store(entry, pc); - } while (entry->nr < PERF_MAX_STACK_DEPTH); + } while (entry->nr < sysctl_perf_event_max_stack); } void diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 041e442a3e28..41d93d0e972b 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2277,7 +2277,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) fp = compat_ptr(ss_base + regs->bp); pagefault_disable(); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { unsigned long bytes; frame.next_frame = 0; frame.return_address = 0; @@ -2337,7 +2337,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) return; pagefault_disable(); - while (entry->nr < PERF_MAX_STACK_DEPTH) { + while (entry->nr < sysctl_perf_event_max_stack) { unsigned long bytes; frame.next_frame = NULL; frame.return_address = 0; diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index 54f01188c29c..a6b00b3af429 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data) void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH, + xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack, callchain_trace, NULL, entry); } void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH, + xtensa_backtrace_user(regs, sysctl_perf_event_max_stack, callchain_trace, entry); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 85749ae8cb5f..a090700cccca 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -58,7 +58,7 @@ struct perf_guest_info_callbacks { struct perf_callchain_entry { __u64 nr; - __u64 ip[PERF_MAX_STACK_DEPTH]; + __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ }; struct perf_raw_record { @@ -993,9 +993,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); +extern int sysctl_perf_event_max_stack; + static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) { - if (entry->nr < PERF_MAX_STACK_DEPTH) { + if (entry->nr < sysctl_perf_event_max_stack) { entry->ip[entry->nr++] = ip; return 0; } else { @@ -1017,6 +1019,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +int perf_event_max_stack_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); static inline bool perf_paranoid_tracepoint_raw(void) { diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 499d9e933f8e..f5a19548be12 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || value_size < 8 || value_size % 8 || - value_size / 8 > PERF_MAX_STACK_DEPTH) + value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); /* hash table size must be power of 2 */ @@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) struct perf_callchain_entry *trace; struct stack_map_bucket *bucket, *new_bucket, *old_bucket; u32 max_depth = map->value_size / 8; - /* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */ - u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth; + /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ + u32 init_nr = sysctl_perf_event_max_stack - max_depth; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len; bool user = flags & BPF_F_USER_STACK; @@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) return -EFAULT; /* get_perf_callchain() guarantees that trace->nr >= init_nr - * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth + * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth */ trace_nr = trace->nr - init_nr; diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 343c22f5e867..b9325e7dcba1 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -18,6 +18,14 @@ struct callchain_cpus_entries { struct perf_callchain_entry *cpu_entries[0]; }; +int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; + +static inline size_t perf_callchain_entry__sizeof(void) +{ + return (sizeof(struct perf_callchain_entry) + + sizeof(__u64) * sysctl_perf_event_max_stack); +} + static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); static atomic_t nr_callchain_events; static DEFINE_MUTEX(callchain_mutex); @@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void) if (!entries) return -ENOMEM; - size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; + size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS; for_each_possible_cpu(cpu) { entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, @@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) cpu = smp_processor_id(); - return &entries->cpu_entries[cpu][*rctx]; + return (((void *)entries->cpu_entries[cpu]) + + (*rctx * perf_callchain_entry__sizeof())); } static void @@ -215,3 +224,25 @@ exit_put: return entry; } + +int perf_event_max_stack_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int new_value = sysctl_perf_event_max_stack, ret; + struct ctl_table new_table = *table; + + new_table.data = &new_value; + ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + mutex_lock(&callchain_mutex); + if (atomic_read(&nr_callchain_events)) + ret = -EBUSY; + else + sysctl_perf_event_max_stack = new_value; + + mutex_unlock(&callchain_mutex); + + return ret; +} diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 725587f10667..c8b318663525 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -130,6 +130,9 @@ static int one_thousand = 1000; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif +#ifdef CONFIG_PERF_EVENTS +static int six_hundred_forty_kb = 640 * 1024; +#endif /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .procname = "perf_event_max_stack", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(sysctl_perf_event_max_stack), + .mode = 0644, + .proc_handler = perf_event_max_stack_handler, + .extra1 = &zero, + .extra2 = &six_hundred_forty_kb, + }, #endif #ifdef CONFIG_KMEMCHECK { -- cgit v1.2.3 From 65da9a0a3bf2202c2432f42d41eb908f2fa30579 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 27 Apr 2016 10:13:46 -0400 Subject: tracing: Make filter_check_discard() local Nothing outside of the tracing directory calls filter_check_discard() or check_filter_check_discard(). They should not be called by modules. Move their prototypes into the local tracing header and remove their EXPORT_SYMBOL() macros. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 6 ------ kernel/trace/trace.c | 2 -- kernel/trace/trace.h | 6 ++++++ 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70a181cb3585..bb383af35cc7 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -413,12 +413,6 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); -extern int filter_check_discard(struct trace_event_file *file, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event); -extern int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event); extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, void *rec); extern void event_triggers_post_call(struct trace_event_file *file, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 46028d47d252..02f5a5f51d49 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -318,7 +318,6 @@ int filter_check_discard(struct trace_event_file *file, void *rec, return 0; } -EXPORT_SYMBOL_GPL(filter_check_discard); int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, @@ -332,7 +331,6 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec, return 0; } -EXPORT_SYMBOL_GPL(call_filter_check_discard); static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c0eac7b1e5a6..ee8691c66bfe 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1065,6 +1065,12 @@ struct trace_subsystem_dir { int nr_events; }; +extern int filter_check_discard(struct trace_event_file *file, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event); +extern int call_filter_check_discard(struct trace_event_call *call, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event); /* * Helper function for event_trigger_unlock_commit{_regs}(). * If there are event triggers attached to this event that requires -- cgit v1.2.3 From 188e3c5cd2b672620291e64a21f1598fe91e40b6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Apr 2016 11:56:04 +0200 Subject: tty: provide tty_name() even without CONFIG_TTY The audit subsystem just started printing the name of the tty, but that causes a build failure when CONFIG_TTY is disabled: kernel/built-in.o: In function `audit_log_task_info': memremap.c:(.text+0x5e34c): undefined reference to `tty_name' kernel/built-in.o: In function `audit_set_loginuid': memremap.c:(.text+0x63b34): undefined reference to `tty_name' This adds tty_name() to the list of functions that are provided as trivial stubs in that configuration. Signed-off-by: Arnd Bergmann Fixes: db0a6fb5d97a ("audit: add tty field to LOGIN event") Signed-off-by: Paul Moore --- include/linux/tty.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index d9fb4b043f56..a93cce297832 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -371,6 +371,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); +extern const char *tty_name(const struct tty_struct *tty); #else static inline void console_init(void) { } @@ -391,6 +392,8 @@ static inline struct tty_struct *get_current_tty(void) /* tty_io.c */ static inline int __init tty_init(void) { return 0; } +static inline const char *tty_name(const struct tty_struct *tty) +{ return "(none)"; } #endif extern struct ktermios tty_std_termios; @@ -415,7 +418,6 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) return tty; } -extern const char *tty_name(const struct tty_struct *tty); extern const char *tty_driver_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int __tty_check_change(struct tty_struct *tty, int sig); -- cgit v1.2.3 From 0224a4a30b57385a60065aa598181868881d8fc6 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 27 Apr 2016 14:04:20 +0300 Subject: device property: Avoid potential dereferences of invalid pointers Since fwnode may hold ERR_PTR(-ENODEV) or it may be NULL, the fwnode type checks is_of_node(), is_acpi_node() and is is_pset_node() need to consider it. Using IS_ERR_OR_NULL() to check it. Fixes: 0d67e0fa1664 (device property: fix for a case of use-after-free) Reported-by: Dan Carpenter Signed-off-by: Heikki Krogerus [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/property.c | 2 +- include/acpi/acpi_bus.h | 4 ++-- include/linux/of.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/property.c b/drivers/base/property.c index 9b1a65debd49..7f692accdc90 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -21,7 +21,7 @@ static inline bool is_pset_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_PDATA; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_PDATA; } static inline struct property_set *to_pset_node(struct fwnode_handle *fwnode) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 14362a84c78e..3a932501d690 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -394,13 +394,13 @@ struct acpi_data_node { static inline bool is_acpi_node(struct fwnode_handle *fwnode) { - return fwnode && (fwnode->type == FWNODE_ACPI + return !IS_ERR_OR_NULL(fwnode) && (fwnode->type == FWNODE_ACPI || fwnode->type == FWNODE_ACPI_DATA); } static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_ACPI; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_ACPI; } static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) diff --git a/include/linux/of.h b/include/linux/of.h index 7fcb681baadf..31758036787c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -133,7 +133,7 @@ void of_core_init(void); static inline bool is_of_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_OF; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_OF; } static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) -- cgit v1.2.3 From 501e7ef569f4ea2dc7e50773cf6a5d757c94f9b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Apr 2016 15:30:07 -0700 Subject: net-rfs: fix false sharing accessing sd->input_queue_head sd->input_queue_head is incremented for each processed packet in process_backlog(), and read from other cpus performing Out Of Order avoidance in get_rps_cpu() Moving this field in a separate cache line keeps it mostly hot for the cpu in process_backlog(), as other cpus will only read it. In a stress test, process_backlog() was consuming 6.80 % of cpu cycles, and the patch reduced the cost to 0.65 % Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 18d8394f2e5d..934ca866562d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2747,11 +2747,15 @@ struct softnet_data { struct sk_buff *completion_queue; #ifdef CONFIG_RPS - /* Elements below can be accessed between CPUs for RPS */ + /* input_queue_head should be written by cpu owning this struct, + * and only read by other cpus. Worth using a cache line. + */ + unsigned int input_queue_head ____cacheline_aligned_in_smp; + + /* Elements below can be accessed between CPUs for RPS/RFS */ struct call_single_data csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; - unsigned int input_queue_head; unsigned int input_queue_tail; #endif unsigned int dropped; -- cgit v1.2.3 From c5b591e96db9d99d0126acf93f24e1fb8b368343 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:33 +0100 Subject: efi: Get rid of the EFI_SYSTEM_TABLES status bit The EFI_SYSTEM_TABLES status bit is set by all EFI supporting architectures upon discovery of the EFI system table, but the bit is never tested in any code we have in the tree. So remove it. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Leif Lindholm Cc: Luck, Tony Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/ia64/kernel/efi.c | 2 -- arch/x86/platform/efi/efi.c | 2 -- drivers/firmware/efi/arm-runtime.c | 1 - include/linux/efi.h | 1 - 4 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 300dac3702f1..bf0865cd438a 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -531,8 +531,6 @@ efi_init (void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - palo_phys = EFI_INVALID_TABLE_ADDR; if (efi_config_init(arch_tables) != 0) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 994a7df84a7b..df393eab0e50 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -352,8 +352,6 @@ static int __init efi_systab_init(void *phys) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff); - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - return 0; } diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 6ae21e41a429..16c7d2a71156 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -105,7 +105,6 @@ static int __init arm_enable_runtime_services(void) pr_err("Failed to remap EFI System Table\n"); return -ENOMEM; } - set_bit(EFI_SYSTEM_TABLES, &efi.flags); if (!efi_virtmap_init()) { pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); diff --git a/include/linux/efi.h b/include/linux/efi.h index 1626474567ac..1545098b0565 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1000,7 +1000,6 @@ extern int __init efi_setup_pcdp_console(char *); * possible, remove EFI-related code altogether. */ #define EFI_BOOT 0 /* Were we booted from EFI? */ -#define EFI_SYSTEM_TABLES 1 /* Can we use EFI system tables? */ #define EFI_CONFIG_TABLES 2 /* Can we use EFI config tables? */ #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ -- cgit v1.2.3 From 78ce248faa3c46e24e9bd42db3ab3650659f16dd Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:06:38 +0100 Subject: efi: Iterate over efi.memmap in for_each_efi_memory_desc() Most of the users of for_each_efi_memory_desc() are equally happy iterating over the EFI memory map in efi.memmap instead of 'memmap', since the former is usually a pointer to the latter. For those users that want to specify an EFI memory map other than efi.memmap, that can be done using for_each_efi_memory_desc_in_map(). One such example is in the libstub code where the firmware is queried directly for the memory map, it gets iterated over, and then freed. This change goes part of the way toward deleting the global 'memmap' variable, which is not universally available on all architectures (notably IA64) and is rather poorly named. Signed-off-by: Matt Fleming Reviewed-by: Ard Biesheuvel Cc: Borislav Petkov Cc: Leif Lindholm Cc: Mark Salter Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-7-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 43 ++++++++------------------ arch/x86/platform/efi/efi_64.c | 10 ++---- arch/x86/platform/efi/quirks.c | 10 +++--- drivers/firmware/efi/arm-init.c | 4 +-- drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/efi.c | 6 +--- drivers/firmware/efi/fake_mem.c | 3 +- drivers/firmware/efi/libstub/efi-stub-helper.c | 6 ++-- include/linux/efi.h | 11 ++++++- 9 files changed, 39 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index df393eab0e50..6f499819a27f 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -119,11 +119,10 @@ void efi_get_time(struct timespec *now) void __init efi_find_mirror(void) { - void *p; + efi_memory_desc_t *md; u64 mirror_size = 0, total_size = 0; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; @@ -146,10 +145,9 @@ void __init efi_find_mirror(void) static void __init do_add_efi_memmap(void) { - void *p; + efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; int e820_type; @@ -226,17 +224,13 @@ void __init efi_print_memmap(void) { #ifdef EFI_DEBUG efi_memory_desc_t *md; - void *p; - int i; + int i = 0; - for (p = memmap.map, i = 0; - p < memmap.map_end; - p += memmap.desc_size, i++) { + for_each_efi_memory_desc(md) { char buf[64]; - md = p; pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", - i, efi_md_typeattr_format(buf, sizeof(buf), md), + i++, efi_md_typeattr_format(buf, sizeof(buf), md), md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, (md->num_pages >> (20 - EFI_PAGE_SHIFT))); @@ -550,12 +544,9 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable) void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; - void *p; /* Make EFI runtime service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - + for_each_efi_memory_desc(md) { if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; @@ -602,12 +593,10 @@ void __init old_map_region(efi_memory_desc_t *md) /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { - void *p; efi_memory_desc_t *md, *prev_md = NULL; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + for_each_efi_memory_desc(md) { u64 prev_size; - md = p; if (!prev_md) { prev_md = md; @@ -650,15 +639,13 @@ static void __init save_runtime_map(void) { #ifdef CONFIG_KEXEC_CORE efi_memory_desc_t *md; - void *tmp, *p, *q = NULL; + void *tmp, *q = NULL; int count = 0; if (efi_enabled(EFI_OLD_MEMMAP)) return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - + for_each_efi_memory_desc(md) { if (!(md->attribute & EFI_MEMORY_RUNTIME) || (md->type == EFI_BOOT_SERVICES_CODE) || (md->type == EFI_BOOT_SERVICES_DATA)) @@ -814,7 +801,6 @@ static void __init kexec_enter_virtual_mode(void) #ifdef CONFIG_KEXEC_CORE efi_memory_desc_t *md; unsigned int num_pages; - void *p; efi.systab = NULL; @@ -838,8 +824,7 @@ static void __init kexec_enter_virtual_mode(void) * Map efi regions which were passed via setup_data. The virt_addr is a * fixed addr which was used in first kernel of a kexec boot. */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; + for_each_efi_memory_desc(md) { efi_map_region_fixed(md); /* FIXME: add error handling */ get_systab_virt_addr(md); } @@ -1009,13 +994,11 @@ void __init efi_enter_virtual_mode(void) u32 efi_mem_type(unsigned long phys_addr) { efi_memory_desc_t *md; - void *p; if (!efi_enabled(EFI_MEMMAP)) return 0; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; + for_each_efi_memory_desc(md) { if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)))) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 49e4dd4a1f58..6e7242be1c87 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -55,14 +55,12 @@ struct efi_scratch efi_scratch; static void __init early_code_mapping_set_exec(int executable) { efi_memory_desc_t *md; - void *p; if (!(__supported_pte_mask & _PAGE_NX)) return; /* Make EFI service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; + for_each_efi_memory_desc(md) { if (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_BOOT_SERVICES_CODE) efi_set_executable(md, executable); @@ -253,7 +251,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. */ - for_each_efi_memory_desc(&memmap, md) { + for_each_efi_memory_desc(md) { if (md->type != EFI_CONVENTIONAL_MEMORY && md->type != EFI_LOADER_DATA && md->type != EFI_LOADER_CODE) @@ -398,7 +396,6 @@ void __init efi_runtime_update_mappings(void) unsigned long pfn; pgd_t *pgd = efi_pgd; efi_memory_desc_t *md; - void *p; if (efi_enabled(EFI_OLD_MEMMAP)) { if (__supported_pte_mask & _PAGE_NX) @@ -409,9 +406,8 @@ void __init efi_runtime_update_mappings(void) if (!efi_enabled(EFI_NX_PE_DATA)) return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + for_each_efi_memory_desc(md) { unsigned long pf = 0; - md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index ab50ada1d56e..097cb09d917b 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -195,10 +195,9 @@ static bool can_free_region(u64 start, u64 size) */ void __init efi_reserve_boot_services(void) { - void *p; + efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; bool already_reserved; @@ -250,10 +249,9 @@ void __init efi_reserve_boot_services(void) void __init efi_free_boot_services(void) { - void *p; + efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 008ed1993b72..d5f6b0ca521e 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -40,7 +40,7 @@ static phys_addr_t efi_to_phys(unsigned long addr) { efi_memory_desc_t *md; - for_each_efi_memory_desc(&memmap, md) { + for_each_efi_memory_desc_in_map(&memmap, md) { if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; if (md->virt_addr == 0) @@ -145,7 +145,7 @@ static __init void reserve_regions(void) if (efi_enabled(EFI_DBG)) pr_info("Processing EFI memory map:\n"); - for_each_efi_memory_desc(&memmap, md) { + for_each_efi_memory_desc_in_map(&memmap, md) { paddr = md->phys_addr; npages = md->num_pages; diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 771750df6b7d..1cfbfaf57a2d 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -48,7 +48,7 @@ static bool __init efi_virtmap_init(void) init_new_context(NULL, &efi_mm); systab_found = false; - for_each_efi_memory_desc(&memmap, md) { + for_each_efi_memory_desc(md) { phys_addr_t phys = md->phys_addr; int ret; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3a69ed5ecfcb..4b533ce73374 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -620,16 +620,12 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, */ u64 __weak efi_mem_attributes(unsigned long phys_addr) { - struct efi_memory_map *map; efi_memory_desc_t *md; - void *p; if (!efi_enabled(EFI_MEMMAP)) return 0; - map = efi.memmap; - for (p = map->map; p < map->map_end; p += map->desc_size) { - md = p; + for_each_efi_memory_desc(md) { if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)))) diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index ed3a854950cc..f55b75b2e1f4 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -68,8 +68,7 @@ void __init efi_fake_memmap(void) return; /* count up the number of EFI memory descriptor */ - for (old = memmap.map; old < memmap.map_end; old += memmap.desc_size) { - md = old; + for_each_efi_memory_desc(md) { start = md->phys_addr; end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 29ed2f9b218c..3bd127f95315 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -125,10 +125,12 @@ unsigned long get_dram_base(efi_system_table_t *sys_table_arg) map.map_end = map.map + map_size; - for_each_efi_memory_desc(&map, md) - if (md->attribute & EFI_MEMORY_WB) + for_each_efi_memory_desc_in_map(&map, md) { + if (md->attribute & EFI_MEMORY_WB) { if (membase > md->phys_addr) membase = md->phys_addr; + } + } efi_call_early(free_pool, map.map); diff --git a/include/linux/efi.h b/include/linux/efi.h index 1545098b0565..17ef4471e603 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -958,11 +958,20 @@ static inline void efi_fake_memmap(void) { } #endif /* Iterate through an efi_memory_map */ -#define for_each_efi_memory_desc(m, md) \ +#define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ (md) = (void *)(md) + (m)->desc_size) +/** + * for_each_efi_memory_desc - iterate over descriptors in efi.memmap + * @md: the efi_memory_desc_t * iterator + * + * Once the loop finishes @md must not be accessed. + */ +#define for_each_efi_memory_desc(md) \ + for_each_efi_memory_desc_in_map(efi.memmap, md) + /* * Format an EFI memory descriptor's type and attributes to a user-provided * character buffer, as per snprintf(), and return the buffer. -- cgit v1.2.3 From 884f4f66ffd6ffe632f3a8be4e6d10a858afdc37 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:06:39 +0100 Subject: efi: Remove global 'memmap' EFI memory map Abolish the poorly named EFI memory map, 'memmap'. It is shadowed by a bunch of local definitions in various files and having two ways to access the EFI memory map ('efi.memmap' vs. 'memmap') is rather confusing. Furthermore, IA64 doesn't even provide this global object, which has caused issues when trying to write generic EFI memmap code. Replace all occurrences with efi.memmap, and convert the remaining iterator code to use for_each_efi_mem_desc(). Signed-off-by: Matt Fleming Reviewed-by: Ard Biesheuvel Cc: Borislav Petkov Cc: Luck, Tony Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-8-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 84 +++++++++++++++++++++----------------- drivers/firmware/efi/arm-init.c | 20 ++++----- drivers/firmware/efi/arm-runtime.c | 12 +++--- drivers/firmware/efi/efi.c | 2 +- drivers/firmware/efi/fake_mem.c | 40 +++++++++--------- include/linux/efi.h | 5 +-- 6 files changed, 85 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 6f499819a27f..88d2fb2cb3ef 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -56,8 +56,6 @@ #define EFI_DEBUG -struct efi_memory_map memmap; - static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; @@ -207,15 +205,13 @@ int __init efi_memblock_x86_reserve_range(void) #else pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); #endif - memmap.phys_map = pmap; - memmap.nr_map = e->efi_memmap_size / + efi.memmap.phys_map = pmap; + efi.memmap.nr_map = e->efi_memmap_size / e->efi_memdesc_size; - memmap.desc_size = e->efi_memdesc_size; - memmap.desc_version = e->efi_memdesc_version; - - memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); + efi.memmap.desc_size = e->efi_memdesc_size; + efi.memmap.desc_version = e->efi_memdesc_version; - efi.memmap = &memmap; + memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size); return 0; } @@ -240,10 +236,14 @@ void __init efi_print_memmap(void) void __init efi_unmap_memmap(void) { + unsigned long size; + clear_bit(EFI_MEMMAP, &efi.flags); - if (memmap.map) { - early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size); - memmap.map = NULL; + + size = efi.memmap.nr_map * efi.memmap.desc_size; + if (efi.memmap.map) { + early_memunmap(efi.memmap.map, size); + efi.memmap.map = NULL; } } @@ -432,17 +432,22 @@ static int __init efi_runtime_init(void) static int __init efi_memmap_init(void) { + unsigned long addr, size; + if (efi_enabled(EFI_PARAVIRT)) return 0; /* Map the EFI memory map */ - memmap.map = early_memremap((unsigned long)memmap.phys_map, - memmap.nr_map * memmap.desc_size); - if (memmap.map == NULL) { + size = efi.memmap.nr_map * efi.memmap.desc_size; + addr = (unsigned long)efi.memmap.phys_map; + + efi.memmap.map = early_memremap(addr, size); + if (efi.memmap.map == NULL) { pr_err("Could not map the memory map!\n"); return -ENOMEM; } - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + + efi.memmap.map_end = efi.memmap.map + size; if (add_efi_memmap) do_add_efi_memmap(); @@ -638,6 +643,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) static void __init save_runtime_map(void) { #ifdef CONFIG_KEXEC_CORE + unsigned long desc_size; efi_memory_desc_t *md; void *tmp, *q = NULL; int count = 0; @@ -645,21 +651,23 @@ static void __init save_runtime_map(void) if (efi_enabled(EFI_OLD_MEMMAP)) return; + desc_size = efi.memmap.desc_size; + for_each_efi_memory_desc(md) { if (!(md->attribute & EFI_MEMORY_RUNTIME) || (md->type == EFI_BOOT_SERVICES_CODE) || (md->type == EFI_BOOT_SERVICES_DATA)) continue; - tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); + tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL); if (!tmp) goto out; q = tmp; - memcpy(q + count * memmap.desc_size, md, memmap.desc_size); + memcpy(q + count * desc_size, md, desc_size); count++; } - efi_runtime_map_setup(q, count, memmap.desc_size); + efi_runtime_map_setup(q, count, desc_size); return; out: @@ -699,10 +707,10 @@ static inline void *efi_map_next_entry_reverse(void *entry) { /* Initial call */ if (!entry) - return memmap.map_end - memmap.desc_size; + return efi.memmap.map_end - efi.memmap.desc_size; - entry -= memmap.desc_size; - if (entry < memmap.map) + entry -= efi.memmap.desc_size; + if (entry < efi.memmap.map) return NULL; return entry; @@ -744,10 +752,10 @@ static void *efi_map_next_entry(void *entry) /* Initial call */ if (!entry) - return memmap.map; + return efi.memmap.map; - entry += memmap.desc_size; - if (entry >= memmap.map_end) + entry += efi.memmap.desc_size; + if (entry >= efi.memmap.map_end) return NULL; return entry; @@ -761,8 +769,11 @@ static void * __init efi_map_regions(int *count, int *pg_shift) { void *p, *new_memmap = NULL; unsigned long left = 0; + unsigned long desc_size; efi_memory_desc_t *md; + desc_size = efi.memmap.desc_size; + p = NULL; while ((p = efi_map_next_entry(p))) { md = p; @@ -777,7 +788,7 @@ static void * __init efi_map_regions(int *count, int *pg_shift) efi_map_region(md); get_systab_virt_addr(md); - if (left < memmap.desc_size) { + if (left < desc_size) { new_memmap = realloc_pages(new_memmap, *pg_shift); if (!new_memmap) return NULL; @@ -786,10 +797,9 @@ static void * __init efi_map_regions(int *count, int *pg_shift) (*pg_shift)++; } - memcpy(new_memmap + (*count * memmap.desc_size), md, - memmap.desc_size); + memcpy(new_memmap + (*count * desc_size), md, desc_size); - left -= memmap.desc_size; + left -= desc_size; (*count)++; } @@ -833,10 +843,10 @@ static void __init kexec_enter_virtual_mode(void) BUG_ON(!efi.systab); - num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE); + num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE); num_pages >>= PAGE_SHIFT; - if (efi_setup_page_tables(memmap.phys_map, num_pages)) { + if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) { clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; } @@ -920,16 +930,16 @@ static void __init __efi_enter_virtual_mode(void) if (efi_is_native()) { status = phys_efi_set_virtual_address_map( - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, + efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, (efi_memory_desc_t *)__pa(new_memmap)); } else { status = efi_thunk_set_virtual_address_map( efi_phys.set_virtual_address_map, - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, + efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, (efi_memory_desc_t *)__pa(new_memmap)); } diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index d5f6b0ca521e..90a9b473e45c 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -20,8 +20,6 @@ #include -struct efi_memory_map memmap; - u64 efi_system_table; static int __init is_normal_ram(efi_memory_desc_t *md) @@ -40,7 +38,7 @@ static phys_addr_t efi_to_phys(unsigned long addr) { efi_memory_desc_t *md; - for_each_efi_memory_desc_in_map(&memmap, md) { + for_each_efi_memory_desc(md) { if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; if (md->virt_addr == 0) @@ -145,7 +143,7 @@ static __init void reserve_regions(void) if (efi_enabled(EFI_DBG)) pr_info("Processing EFI memory map:\n"); - for_each_efi_memory_desc_in_map(&memmap, md) { + for_each_efi_memory_desc(md) { paddr = md->phys_addr; npages = md->num_pages; @@ -186,9 +184,9 @@ void __init efi_init(void) efi_system_table = params.system_table; - memmap.phys_map = params.mmap; - memmap.map = early_memremap_ro(params.mmap, params.mmap_size); - if (memmap.map == NULL) { + efi.memmap.phys_map = params.mmap; + efi.memmap.map = early_memremap_ro(params.mmap, params.mmap_size); + if (efi.memmap.map == NULL) { /* * If we are booting via UEFI, the UEFI memory map is the only * description of memory we have, so there is little point in @@ -196,15 +194,15 @@ void __init efi_init(void) */ panic("Unable to map EFI memory map.\n"); } - memmap.map_end = memmap.map + params.mmap_size; - memmap.desc_size = params.desc_size; - memmap.desc_version = params.desc_ver; + efi.memmap.map_end = efi.memmap.map + params.mmap_size; + efi.memmap.desc_size = params.desc_size; + efi.memmap.desc_version = params.desc_ver; if (uefi_init() < 0) return; reserve_regions(); - early_memunmap(memmap.map, params.mmap_size); + early_memunmap(efi.memmap.map, params.mmap_size); if (IS_ENABLED(CONFIG_ARM)) { /* diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 1cfbfaf57a2d..55a9ea041068 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -103,15 +103,15 @@ static int __init arm_enable_runtime_services(void) pr_info("Remapping and enabling EFI services.\n"); - mapsize = memmap.map_end - memmap.map; - memmap.map = (__force void *)ioremap_cache(memmap.phys_map, - mapsize); - if (!memmap.map) { + mapsize = efi.memmap.map_end - efi.memmap.map; + + efi.memmap.map = (__force void *)ioremap_cache(efi.memmap.phys_map, + mapsize); + if (!efi.memmap.map) { pr_err("Failed to remap EFI memory map\n"); return -ENOMEM; } - memmap.map_end = memmap.map + mapsize; - efi.memmap = &memmap; + efi.memmap.map_end = efi.memmap.map + mapsize; if (!efi_virtmap_init()) { pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n"); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4b533ce73374..f7d36c6cc1ad 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -256,7 +256,7 @@ subsys_initcall(efisubsys_init); */ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) { - struct efi_memory_map *map = efi.memmap; + struct efi_memory_map *map = &efi.memmap; phys_addr_t p, e; if (!efi_enabled(EFI_MEMMAP)) { diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index f55b75b2e1f4..48430aba13c1 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -57,7 +57,7 @@ static int __init cmp_fake_mem(const void *x1, const void *x2) void __init efi_fake_memmap(void) { u64 start, end, m_start, m_end, m_attr; - int new_nr_map = memmap.nr_map; + int new_nr_map = efi.memmap.nr_map; efi_memory_desc_t *md; phys_addr_t new_memmap_phy; void *new_memmap; @@ -94,25 +94,25 @@ void __init efi_fake_memmap(void) } /* allocate memory for new EFI memmap */ - new_memmap_phy = memblock_alloc(memmap.desc_size * new_nr_map, + new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map, PAGE_SIZE); if (!new_memmap_phy) return; /* create new EFI memmap */ new_memmap = early_memremap(new_memmap_phy, - memmap.desc_size * new_nr_map); + efi.memmap.desc_size * new_nr_map); if (!new_memmap) { - memblock_free(new_memmap_phy, memmap.desc_size * new_nr_map); + memblock_free(new_memmap_phy, efi.memmap.desc_size * new_nr_map); return; } - for (old = memmap.map, new = new_memmap; - old < memmap.map_end; - old += memmap.desc_size, new += memmap.desc_size) { + for (old = efi.memmap.map, new = new_memmap; + old < efi.memmap.map_end; + old += efi.memmap.desc_size, new += efi.memmap.desc_size) { /* copy original EFI memory descriptor */ - memcpy(new, old, memmap.desc_size); + memcpy(new, old, efi.memmap.desc_size); md = new; start = md->phys_addr; end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; @@ -133,8 +133,8 @@ void __init efi_fake_memmap(void) md->num_pages = (m_end - md->phys_addr + 1) >> EFI_PAGE_SHIFT; /* latter part */ - new += memmap.desc_size; - memcpy(new, old, memmap.desc_size); + new += efi.memmap.desc_size; + memcpy(new, old, efi.memmap.desc_size); md = new; md->phys_addr = m_end + 1; md->num_pages = (end - md->phys_addr + 1) >> @@ -146,16 +146,16 @@ void __init efi_fake_memmap(void) md->num_pages = (m_start - md->phys_addr) >> EFI_PAGE_SHIFT; /* middle part */ - new += memmap.desc_size; - memcpy(new, old, memmap.desc_size); + new += efi.memmap.desc_size; + memcpy(new, old, efi.memmap.desc_size); md = new; md->attribute |= m_attr; md->phys_addr = m_start; md->num_pages = (m_end - m_start + 1) >> EFI_PAGE_SHIFT; /* last part */ - new += memmap.desc_size; - memcpy(new, old, memmap.desc_size); + new += efi.memmap.desc_size; + memcpy(new, old, efi.memmap.desc_size); md = new; md->phys_addr = m_end + 1; md->num_pages = (end - m_end) >> @@ -168,8 +168,8 @@ void __init efi_fake_memmap(void) md->num_pages = (m_start - md->phys_addr) >> EFI_PAGE_SHIFT; /* latter part */ - new += memmap.desc_size; - memcpy(new, old, memmap.desc_size); + new += efi.memmap.desc_size; + memcpy(new, old, efi.memmap.desc_size); md = new; md->phys_addr = m_start; md->num_pages = (end - md->phys_addr + 1) >> @@ -181,10 +181,10 @@ void __init efi_fake_memmap(void) /* swap into new EFI memmap */ efi_unmap_memmap(); - memmap.map = new_memmap; - memmap.phys_map = new_memmap_phy; - memmap.nr_map = new_nr_map; - memmap.map_end = memmap.map + memmap.nr_map * memmap.desc_size; + efi.memmap.map = new_memmap; + efi.memmap.phys_map = new_memmap_phy; + efi.memmap.nr_map = new_nr_map; + efi.memmap.map_end = efi.memmap.map + efi.memmap.nr_map * efi.memmap.desc_size; set_bit(EFI_MEMMAP, &efi.flags); /* print new EFI memmap */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 17ef4471e603..c2c0da49876e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -883,7 +883,7 @@ extern struct efi { efi_get_next_high_mono_count_t *get_next_high_mono_count; efi_reset_system_t *reset_system; efi_set_virtual_address_map_t *set_virtual_address_map; - struct efi_memory_map *memmap; + struct efi_memory_map memmap; unsigned long flags; } efi; @@ -945,7 +945,6 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, extern void efi_get_time(struct timespec *now); extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params); -extern struct efi_memory_map memmap; extern struct kobject *efi_kobj; extern int efi_reboot_quirk_mode; @@ -970,7 +969,7 @@ static inline void efi_fake_memmap(void) { } * Once the loop finishes @md must not be accessed. */ #define for_each_efi_memory_desc(md) \ - for_each_efi_memory_desc_in_map(efi.memmap, md) + for_each_efi_memory_desc_in_map(&efi.memmap, md) /* * Format an EFI memory descriptor's type and attributes to a user-provided -- cgit v1.2.3 From a604af075a3226adaff84b7026876f0c6dfe9f52 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:44 +0100 Subject: efi: Add support for the EFI_MEMORY_ATTRIBUTES_TABLE config table This declares the GUID and struct typedef for the new memory attributes table which contains the permissions that can be used to apply stricter permissions to UEFI Runtime Services memory regions. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Catalin Marinas Cc: Leif Lindholm Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-13-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 2 ++ include/linux/efi.h | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f7d36c6cc1ad..583e647912a5 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -43,6 +43,7 @@ struct efi __read_mostly efi = { .config_table = EFI_INVALID_TABLE_ADDR, .esrt = EFI_INVALID_TABLE_ADDR, .properties_table = EFI_INVALID_TABLE_ADDR, + .mem_attr_table = EFI_INVALID_TABLE_ADDR, }; EXPORT_SYMBOL(efi); @@ -338,6 +339,7 @@ static __initdata efi_config_table_type_t common_tables[] = { {UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga}, {EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt}, {EFI_PROPERTIES_TABLE_GUID, "PROP", &efi.properties_table}, + {EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table}, {NULL_GUID, NULL, NULL}, }; diff --git a/include/linux/efi.h b/include/linux/efi.h index c2c0da49876e..81af5feba1f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -623,6 +623,10 @@ void efi_native_runtime_setup(void); EFI_GUID(0x3152bca5, 0xeade, 0x433d, \ 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44) +#define EFI_MEMORY_ATTRIBUTES_TABLE_GUID \ + EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \ + 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) + typedef struct { efi_guid_t guid; u64 table; @@ -847,6 +851,14 @@ typedef struct { #define EFI_INVALID_TABLE_ADDR (~0UL) +typedef struct { + u32 version; + u32 num_entries; + u32 desc_size; + u32 reserved; + efi_memory_desc_t entry[0]; +} efi_memory_attributes_table_t; + /* * All runtime access to EFI goes through this structure: */ @@ -868,6 +880,7 @@ extern struct efi { unsigned long config_table; /* config tables */ unsigned long esrt; /* ESRT table */ unsigned long properties_table; /* properties table */ + unsigned long mem_attr_table; /* memory attributes table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; -- cgit v1.2.3 From 10f0d2f57705350bbbe5f28e9292ae3905823c3c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:45 +0100 Subject: efi: Implement generic support for the Memory Attributes table This implements shared support for discovering the presence of the Memory Attributes table, and for parsing and validating its contents. The table is validated against the construction rules in the UEFI spec. Since this is a new table, it makes sense to complain if we encounter a table that does not follow those rules. The parsing and validation routine takes a callback that can be specified per architecture, that gets passed each unique validated region, with the virtual address retrieved from the ordinary memory map. Signed-off-by: Ard Biesheuvel [ Trim pr_*() strings to 80 cols and use EFI consistently. ] Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Catalin Marinas Cc: Leif Lindholm Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-14-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/Makefile | 2 +- drivers/firmware/efi/memattr.c | 182 +++++++++++++++++++++++++++++++++++++++++ include/linux/efi.h | 13 +++ 3 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 drivers/firmware/efi/memattr.c (limited to 'include/linux') diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 62e654f255f4..d5be62399130 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -9,7 +9,7 @@ # KASAN_SANITIZE_runtime-wrappers.o := n -obj-$(CONFIG_EFI) += efi.o vars.o reboot.o +obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_ESRT) += esrt.o obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c new file mode 100644 index 000000000000..236004b9a50d --- /dev/null +++ b/drivers/firmware/efi/memattr.c @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2016 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) "efi: memattr: " fmt + +#include +#include +#include +#include + +#include + +static int __initdata tbl_size; + +/* + * Reserve the memory associated with the Memory Attributes configuration + * table, if it exists. + */ +int __init efi_memattr_init(void) +{ + efi_memory_attributes_table_t *tbl; + + if (efi.mem_attr_table == EFI_INVALID_TABLE_ADDR) + return 0; + + tbl = early_memremap(efi.mem_attr_table, sizeof(*tbl)); + if (!tbl) { + pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n", + efi.mem_attr_table); + return -ENOMEM; + } + + if (tbl->version > 1) { + pr_warn("Unexpected EFI Memory Attributes table version %d\n", + tbl->version); + goto unmap; + } + + tbl_size = sizeof(*tbl) + tbl->num_entries * tbl->desc_size; + memblock_reserve(efi.mem_attr_table, tbl_size); + +unmap: + early_memunmap(tbl, sizeof(*tbl)); + return 0; +} + +/* + * Returns a copy @out of the UEFI memory descriptor @in if it is covered + * entirely by a UEFI memory map entry with matching attributes. The virtual + * address of @out is set according to the matching entry that was found. + */ +static bool entry_is_valid(const efi_memory_desc_t *in, efi_memory_desc_t *out) +{ + u64 in_paddr = in->phys_addr; + u64 in_size = in->num_pages << EFI_PAGE_SHIFT; + efi_memory_desc_t *md; + + *out = *in; + + if (in->type != EFI_RUNTIME_SERVICES_CODE && + in->type != EFI_RUNTIME_SERVICES_DATA) { + pr_warn("Entry type should be RuntimeServiceCode/Data\n"); + return false; + } + + if (!(in->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))) { + pr_warn("Entry attributes invalid: RO and XP bits both cleared\n"); + return false; + } + + if (PAGE_SIZE > EFI_PAGE_SIZE && + (!PAGE_ALIGNED(in->phys_addr) || + !PAGE_ALIGNED(in->num_pages << EFI_PAGE_SHIFT))) { + /* + * Since arm64 may execute with page sizes of up to 64 KB, the + * UEFI spec mandates that RuntimeServices memory regions must + * be 64 KB aligned. We need to validate this here since we will + * not be able to tighten permissions on such regions without + * affecting adjacent regions. + */ + pr_warn("Entry address region misaligned\n"); + return false; + } + + for_each_efi_memory_desc(md) { + u64 md_paddr = md->phys_addr; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) { + /* no virtual mapping has been installed by the stub */ + break; + } + + if (md_paddr > in_paddr || (in_paddr - md_paddr) >= md_size) + continue; + + /* + * This entry covers the start of @in, check whether + * it covers the end as well. + */ + if (md_paddr + md_size < in_paddr + in_size) { + pr_warn("Entry covers multiple EFI memory map regions\n"); + return false; + } + + if (md->type != in->type) { + pr_warn("Entry type deviates from EFI memory map region type\n"); + return false; + } + + out->virt_addr = in_paddr + (md->virt_addr - md_paddr); + + return true; + } + + pr_warn("No matching entry found in the EFI memory map\n"); + return false; +} + +/* + * To be called after the EFI page tables have been populated. If a memory + * attributes table is available, its contents will be used to update the + * mappings with tightened permissions as described by the table. + * This requires the UEFI memory map to have already been populated with + * virtual addresses. + */ +int __init efi_memattr_apply_permissions(struct mm_struct *mm, + efi_memattr_perm_setter fn) +{ + efi_memory_attributes_table_t *tbl; + int i, ret; + + if (tbl_size <= sizeof(*tbl)) + return 0; + + /* + * We need the EFI memory map to be setup so we can use it to + * lookup the virtual addresses of all entries in the of EFI + * Memory Attributes table. If it isn't available, this + * function should not be called. + */ + if (WARN_ON(!efi_enabled(EFI_MEMMAP))) + return 0; + + tbl = memremap(efi.mem_attr_table, tbl_size, MEMREMAP_WB); + if (!tbl) { + pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n", + efi.mem_attr_table); + return -ENOMEM; + } + + if (efi_enabled(EFI_DBG)) + pr_info("Processing EFI Memory Attributes table:\n"); + + for (i = ret = 0; ret == 0 && i < tbl->num_entries; i++) { + efi_memory_desc_t md; + unsigned long size; + bool valid; + char buf[64]; + + valid = entry_is_valid((void *)tbl->entry + i * tbl->desc_size, + &md); + size = md.num_pages << EFI_PAGE_SHIFT; + if (efi_enabled(EFI_DBG) || !valid) + pr_info("%s 0x%012llx-0x%012llx %s\n", + valid ? "" : "!", md.phys_addr, + md.phys_addr + size - 1, + efi_md_typeattr_format(buf, sizeof(buf), &md)); + + if (valid) + ret = fn(mm, &md); + } + memunmap(tbl); + return ret; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 81af5feba1f7..e29a31d0fc35 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -969,6 +969,19 @@ extern void __init efi_fake_memmap(void); static inline void efi_fake_memmap(void) { } #endif +/* + * efi_memattr_perm_setter - arch specific callback function passed into + * efi_memattr_apply_permissions() that updates the + * mapping permissions described by the second + * argument in the page tables referred to by the + * first argument. + */ +typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *); + +extern int efi_memattr_init(void); +extern int efi_memattr_apply_permissions(struct mm_struct *mm, + efi_memattr_perm_setter fn); + /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ -- cgit v1.2.3 From 2c23b73c2d0249c499c4784b6db08dcfc6b7b3b0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:48 +0100 Subject: x86/efi: Prepare GOP handling code for reuse as generic code In preparation of moving this code to drivers/firmware/efi and reusing it on ARM and arm64, apply any changes that will be required to make this code build for other architectures. This should make it easier to track down problems that this move may cause to its operation on x86. Note that the generic version uses slightly different ways of casting the protocol methods and some other variables to the correct types, since such method calls are not loosely typed on ARM and arm64 as they are on x86. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: David Herrmann Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-17-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 58 ++++++++++++++++++++++++---------------- arch/x86/boot/compressed/eboot.h | 4 +++ arch/x86/include/asm/efi.h | 5 ++++ include/linux/efi.h | 5 ++++ 4 files changed, 49 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 583d539a4197..10516e22fdcb 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -622,19 +622,22 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, } static efi_status_t -__gop_query32(struct efi_graphics_output_protocol_32 *gop32, +__gop_query32(efi_system_table_t *sys_table_arg, + struct efi_graphics_output_protocol_32 *gop32, struct efi_graphics_output_mode_info **info, unsigned long *size, u64 *fb_base) { struct efi_graphics_output_protocol_mode_32 *mode; + efi_graphics_output_protocol_query_mode query_mode; efi_status_t status; unsigned long m; m = gop32->mode; mode = (struct efi_graphics_output_protocol_mode_32 *)m; + query_mode = (void *)(unsigned long)gop32->query_mode; - status = efi_early->call(gop32->query_mode, gop32, - mode->mode, size, info); + status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size, + info); if (status != EFI_SUCCESS) return status; @@ -643,8 +646,8 @@ __gop_query32(struct efi_graphics_output_protocol_32 *gop32, } static efi_status_t -setup_gop32(struct screen_info *si, efi_guid_t *proto, - unsigned long size, void **gop_handle) +setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, + efi_guid_t *proto, unsigned long size, void **gop_handle) { struct efi_graphics_output_protocol_32 *gop32, *first_gop; unsigned long nr_gops; @@ -654,7 +657,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; - efi_status_t status; + efi_status_t status = EFI_NOT_FOUND; u32 *handles = (u32 *)(unsigned long)gop_handle; int i; @@ -667,7 +670,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; void *dummy = NULL; - u32 h = handles[i]; + efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; u64 current_fb_base; status = efi_call_early(handle_protocol, h, @@ -680,7 +683,8 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query32(gop32, &info, &size, ¤t_fb_base); + status = __gop_query32(sys_table_arg, gop32, &info, &size, + ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -735,19 +739,22 @@ out: } static efi_status_t -__gop_query64(struct efi_graphics_output_protocol_64 *gop64, +__gop_query64(efi_system_table_t *sys_table_arg, + struct efi_graphics_output_protocol_64 *gop64, struct efi_graphics_output_mode_info **info, unsigned long *size, u64 *fb_base) { struct efi_graphics_output_protocol_mode_64 *mode; + efi_graphics_output_protocol_query_mode query_mode; efi_status_t status; unsigned long m; m = gop64->mode; mode = (struct efi_graphics_output_protocol_mode_64 *)m; + query_mode = (void *)(unsigned long)gop64->query_mode; - status = efi_early->call(gop64->query_mode, gop64, - mode->mode, size, info); + status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size, + info); if (status != EFI_SUCCESS) return status; @@ -756,8 +763,8 @@ __gop_query64(struct efi_graphics_output_protocol_64 *gop64, } static efi_status_t -setup_gop64(struct screen_info *si, efi_guid_t *proto, - unsigned long size, void **gop_handle) +setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, + efi_guid_t *proto, unsigned long size, void **gop_handle) { struct efi_graphics_output_protocol_64 *gop64, *first_gop; unsigned long nr_gops; @@ -767,7 +774,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; - efi_status_t status; + efi_status_t status = EFI_NOT_FOUND; u64 *handles = (u64 *)(unsigned long)gop_handle; int i; @@ -780,7 +787,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; void *dummy = NULL; - u64 h = handles[i]; + efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; u64 current_fb_base; status = efi_call_early(handle_protocol, h, @@ -793,7 +800,8 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query64(gop64, &info, &size, ¤t_fb_base); + status = __gop_query64(sys_table_arg, gop64, &info, &size, + ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -850,8 +858,9 @@ out: /* * See if we have Graphics Output Protocol */ -static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, - unsigned long size) +efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, + struct screen_info *si, efi_guid_t *proto, + unsigned long size) { efi_status_t status; void **gop_handle = NULL; @@ -867,10 +876,13 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, if (status != EFI_SUCCESS) goto free_handle; - if (efi_early->is64) - status = setup_gop64(si, proto, size, gop_handle); - else - status = setup_gop32(si, proto, size, gop_handle); + if (efi_is_64bit()) { + status = setup_gop64(sys_table_arg, si, proto, size, + gop_handle); + } else { + status = setup_gop32(sys_table_arg, si, proto, size, + gop_handle); + } free_handle: efi_call_early(free_pool, gop_handle); @@ -1038,7 +1050,7 @@ void setup_graphics(struct boot_params *boot_params) EFI_LOCATE_BY_PROTOCOL, &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) - status = setup_gop(si, &graphics_proto, size); + status = efi_setup_gop(NULL, si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index d487e727f1ec..4ee5318d7f28 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -85,6 +85,10 @@ struct efi_graphics_output_protocol { struct efi_graphics_output_protocol_mode *mode; }; +typedef efi_status_t (*efi_graphics_output_protocol_query_mode)( + struct efi_graphics_output_protocol *, u32, unsigned long *, + struct efi_graphics_output_mode_info **); + struct efi_uga_draw_protocol_32 { u32 get_mode; u32 set_mode; diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 53748c45e488..10e440770371 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -225,6 +225,11 @@ __pure const struct efi_config *__efi_early(void); #define efi_call_early(f, ...) \ __efi_early()->call(__efi_early()->f, __VA_ARGS__); +#define __efi_call_early(f, ...) \ + __efi_early()->call((unsigned long)f, __VA_ARGS__); + +#define efi_is_64bit() __efi_early()->is64 + extern bool efi_reboot_required(void); #else diff --git a/include/linux/efi.h b/include/linux/efi.h index e29a31d0fc35..c2949909339b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -1352,5 +1353,9 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, efi_status_t efi_parse_options(char *cmdline); +efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, + struct screen_info *si, efi_guid_t *proto, + unsigned long size); + bool efi_runtime_disabled(void); #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From fc37206427ce38eafbeff48099d873235e878450 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:49 +0100 Subject: efi/libstub: Move Graphics Output Protocol handling to generic code The Graphics Output Protocol code executes in the stub, so create a generic version based on the x86 version in libstub so that we can move other archs to it in subsequent patches. The new source file gop.c is added to the libstub build for all architectures, but only wired up for x86. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: David Herrmann Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-18-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 4 +- arch/arm64/include/asm/efi.h | 4 +- arch/x86/boot/compressed/eboot.c | 318 ------------------------------ arch/x86/boot/compressed/eboot.h | 78 -------- drivers/firmware/efi/libstub/Makefile | 2 +- drivers/firmware/efi/libstub/gop.c | 354 ++++++++++++++++++++++++++++++++++ include/linux/efi.h | 81 +++++++- 7 files changed, 441 insertions(+), 400 deletions(-) create mode 100644 drivers/firmware/efi/libstub/gop.c (limited to 'include/linux') diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index b0c341d7ceee..dc30d89a1ed3 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -60,7 +60,9 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define __efi_call_early(f, ...) f(__VA_ARGS__) +#define efi_is_64bit() (false) /* * A reasonable upper bound for the uncompressed kernel size is 32 MBytes, diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 4dafc89f373a..af40baa5d53f 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -52,7 +52,9 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); #define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ #define MAX_FDT_OFFSET SZ_512M -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define __efi_call_early(f, ...) f(__VA_ARGS__) +#define efi_is_64bit() (true) #define EFI_ALLOC_ALIGN SZ_64K diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 10516e22fdcb..52fef606bc54 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -571,324 +571,6 @@ free_handle: efi_call_early(free_pool, pci_handle); } -static void -setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, - struct efi_pixel_bitmask pixel_info, int pixel_format) -{ - if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { - si->lfb_depth = 32; - si->lfb_linelength = pixels_per_scan_line * 4; - si->red_size = 8; - si->red_pos = 0; - si->green_size = 8; - si->green_pos = 8; - si->blue_size = 8; - si->blue_pos = 16; - si->rsvd_size = 8; - si->rsvd_pos = 24; - } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { - si->lfb_depth = 32; - si->lfb_linelength = pixels_per_scan_line * 4; - si->red_size = 8; - si->red_pos = 16; - si->green_size = 8; - si->green_pos = 8; - si->blue_size = 8; - si->blue_pos = 0; - si->rsvd_size = 8; - si->rsvd_pos = 24; - } else if (pixel_format == PIXEL_BIT_MASK) { - find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); - find_bits(pixel_info.green_mask, &si->green_pos, - &si->green_size); - find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); - find_bits(pixel_info.reserved_mask, &si->rsvd_pos, - &si->rsvd_size); - si->lfb_depth = si->red_size + si->green_size + - si->blue_size + si->rsvd_size; - si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; - } else { - si->lfb_depth = 4; - si->lfb_linelength = si->lfb_width / 2; - si->red_size = 0; - si->red_pos = 0; - si->green_size = 0; - si->green_pos = 0; - si->blue_size = 0; - si->blue_pos = 0; - si->rsvd_size = 0; - si->rsvd_pos = 0; - } -} - -static efi_status_t -__gop_query32(efi_system_table_t *sys_table_arg, - struct efi_graphics_output_protocol_32 *gop32, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_32 *mode; - efi_graphics_output_protocol_query_mode query_mode; - efi_status_t status; - unsigned long m; - - m = gop32->mode; - mode = (struct efi_graphics_output_protocol_mode_32 *)m; - query_mode = (void *)(unsigned long)gop32->query_mode; - - status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size, - info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; -} - -static efi_status_t -setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, - efi_guid_t *proto, unsigned long size, void **gop_handle) -{ - struct efi_graphics_output_protocol_32 *gop32, *first_gop; - unsigned long nr_gops; - u16 width, height; - u32 pixels_per_scan_line; - u32 ext_lfb_base; - u64 fb_base; - struct efi_pixel_bitmask pixel_info; - int pixel_format; - efi_status_t status = EFI_NOT_FOUND; - u32 *handles = (u32 *)(unsigned long)gop_handle; - int i; - - first_gop = NULL; - gop32 = NULL; - - nr_gops = size / sizeof(u32); - for (i = 0; i < nr_gops; i++) { - struct efi_graphics_output_mode_info *info = NULL; - efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; - bool conout_found = false; - void *dummy = NULL; - efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; - u64 current_fb_base; - - status = efi_call_early(handle_protocol, h, - proto, (void **)&gop32); - if (status != EFI_SUCCESS) - continue; - - status = efi_call_early(handle_protocol, h, - &conout_proto, &dummy); - if (status == EFI_SUCCESS) - conout_found = true; - - status = __gop_query32(sys_table_arg, gop32, &info, &size, - ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found)) { - /* - * Systems that use the UEFI Console Splitter may - * provide multiple GOP devices, not all of which are - * backed by real hardware. The workaround is to search - * for a GOP implementing the ConOut protocol, and if - * one isn't found, to just fall back to the first GOP. - */ - width = info->horizontal_resolution; - height = info->vertical_resolution; - pixel_format = info->pixel_format; - pixel_info = info->pixel_information; - pixels_per_scan_line = info->pixels_per_scan_line; - fb_base = current_fb_base; - - /* - * Once we've found a GOP supporting ConOut, - * don't bother looking any further. - */ - first_gop = gop32; - if (conout_found) - break; - } - } - - /* Did we find any GOPs? */ - if (!first_gop) - goto out; - - /* EFI framebuffer */ - si->orig_video_isVGA = VIDEO_TYPE_EFI; - - si->lfb_width = width; - si->lfb_height = height; - si->lfb_base = fb_base; - - ext_lfb_base = (u64)(unsigned long)fb_base >> 32; - if (ext_lfb_base) { - si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; - si->ext_lfb_base = ext_lfb_base; - } - - si->pages = 1; - - setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); - - si->lfb_size = si->lfb_linelength * si->lfb_height; - - si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: - return status; -} - -static efi_status_t -__gop_query64(efi_system_table_t *sys_table_arg, - struct efi_graphics_output_protocol_64 *gop64, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_64 *mode; - efi_graphics_output_protocol_query_mode query_mode; - efi_status_t status; - unsigned long m; - - m = gop64->mode; - mode = (struct efi_graphics_output_protocol_mode_64 *)m; - query_mode = (void *)(unsigned long)gop64->query_mode; - - status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size, - info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; -} - -static efi_status_t -setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, - efi_guid_t *proto, unsigned long size, void **gop_handle) -{ - struct efi_graphics_output_protocol_64 *gop64, *first_gop; - unsigned long nr_gops; - u16 width, height; - u32 pixels_per_scan_line; - u32 ext_lfb_base; - u64 fb_base; - struct efi_pixel_bitmask pixel_info; - int pixel_format; - efi_status_t status = EFI_NOT_FOUND; - u64 *handles = (u64 *)(unsigned long)gop_handle; - int i; - - first_gop = NULL; - gop64 = NULL; - - nr_gops = size / sizeof(u64); - for (i = 0; i < nr_gops; i++) { - struct efi_graphics_output_mode_info *info = NULL; - efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; - bool conout_found = false; - void *dummy = NULL; - efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; - u64 current_fb_base; - - status = efi_call_early(handle_protocol, h, - proto, (void **)&gop64); - if (status != EFI_SUCCESS) - continue; - - status = efi_call_early(handle_protocol, h, - &conout_proto, &dummy); - if (status == EFI_SUCCESS) - conout_found = true; - - status = __gop_query64(sys_table_arg, gop64, &info, &size, - ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found)) { - /* - * Systems that use the UEFI Console Splitter may - * provide multiple GOP devices, not all of which are - * backed by real hardware. The workaround is to search - * for a GOP implementing the ConOut protocol, and if - * one isn't found, to just fall back to the first GOP. - */ - width = info->horizontal_resolution; - height = info->vertical_resolution; - pixel_format = info->pixel_format; - pixel_info = info->pixel_information; - pixels_per_scan_line = info->pixels_per_scan_line; - fb_base = current_fb_base; - - /* - * Once we've found a GOP supporting ConOut, - * don't bother looking any further. - */ - first_gop = gop64; - if (conout_found) - break; - } - } - - /* Did we find any GOPs? */ - if (!first_gop) - goto out; - - /* EFI framebuffer */ - si->orig_video_isVGA = VIDEO_TYPE_EFI; - - si->lfb_width = width; - si->lfb_height = height; - si->lfb_base = fb_base; - - ext_lfb_base = (u64)(unsigned long)fb_base >> 32; - if (ext_lfb_base) { - si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; - si->ext_lfb_base = ext_lfb_base; - } - - si->pages = 1; - - setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); - - si->lfb_size = si->lfb_linelength * si->lfb_height; - - si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: - return status; -} - -/* - * See if we have Graphics Output Protocol - */ -efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, - struct screen_info *si, efi_guid_t *proto, - unsigned long size) -{ - efi_status_t status; - void **gop_handle = NULL; - - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)&gop_handle); - if (status != EFI_SUCCESS) - return status; - - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - proto, NULL, &size, gop_handle); - if (status != EFI_SUCCESS) - goto free_handle; - - if (efi_is_64bit()) { - status = setup_gop64(sys_table_arg, si, proto, size, - gop_handle); - } else { - status = setup_gop32(sys_table_arg, si, proto, size, - gop_handle); - } - -free_handle: - efi_call_early(free_pool, gop_handle); - return status; -} - static efi_status_t setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) { diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 4ee5318d7f28..c0223f1a89d7 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -11,84 +11,6 @@ #define DESC_TYPE_CODE_DATA (1 << 0) -#define EFI_CONSOLE_OUT_DEVICE_GUID \ - EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ - 0x3f, 0xc1, 0x4d) - -#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 -#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 -#define PIXEL_BIT_MASK 2 -#define PIXEL_BLT_ONLY 3 -#define PIXEL_FORMAT_MAX 4 - -struct efi_pixel_bitmask { - u32 red_mask; - u32 green_mask; - u32 blue_mask; - u32 reserved_mask; -}; - -struct efi_graphics_output_mode_info { - u32 version; - u32 horizontal_resolution; - u32 vertical_resolution; - int pixel_format; - struct efi_pixel_bitmask pixel_information; - u32 pixels_per_scan_line; -} __packed; - -struct efi_graphics_output_protocol_mode_32 { - u32 max_mode; - u32 mode; - u32 info; - u32 size_of_info; - u64 frame_buffer_base; - u32 frame_buffer_size; -} __packed; - -struct efi_graphics_output_protocol_mode_64 { - u32 max_mode; - u32 mode; - u64 info; - u64 size_of_info; - u64 frame_buffer_base; - u64 frame_buffer_size; -} __packed; - -struct efi_graphics_output_protocol_mode { - u32 max_mode; - u32 mode; - unsigned long info; - unsigned long size_of_info; - u64 frame_buffer_base; - unsigned long frame_buffer_size; -} __packed; - -struct efi_graphics_output_protocol_32 { - u32 query_mode; - u32 set_mode; - u32 blt; - u32 mode; -}; - -struct efi_graphics_output_protocol_64 { - u64 query_mode; - u64 set_mode; - u64 blt; - u64 mode; -}; - -struct efi_graphics_output_protocol { - void *query_mode; - unsigned long set_mode; - unsigned long blt; - struct efi_graphics_output_protocol_mode *mode; -}; - -typedef efi_status_t (*efi_graphics_output_protocol_query_mode)( - struct efi_graphics_output_protocol *, u32, unsigned long *, - struct efi_graphics_output_mode_info **); - struct efi_uga_draw_protocol_32 { u32 get_mode; u32 set_mode; diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index da99bbb74aeb..c06945160a41 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -28,7 +28,7 @@ OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n -lib-y := efi-stub-helper.o +lib-y := efi-stub-helper.o gop.o # include the stub's generic dependencies from lib/ when building for ARM/arm64 arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c new file mode 100644 index 000000000000..932742e4cf23 --- /dev/null +++ b/drivers/firmware/efi/libstub/gop.c @@ -0,0 +1,354 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2011 Intel Corporation; author Matt Fleming + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +#include +#include +#include +#include + +static void find_bits(unsigned long mask, u8 *pos, u8 *size) +{ + u8 first, len; + + first = 0; + len = 0; + + if (mask) { + while (!(mask & 0x1)) { + mask = mask >> 1; + first++; + } + + while (mask & 0x1) { + mask = mask >> 1; + len++; + } + } + + *pos = first; + *size = len; +} + +static void +setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, + struct efi_pixel_bitmask pixel_info, int pixel_format) +{ + if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 0; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 16; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 16; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 0; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BIT_MASK) { + find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); + find_bits(pixel_info.green_mask, &si->green_pos, + &si->green_size); + find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); + find_bits(pixel_info.reserved_mask, &si->rsvd_pos, + &si->rsvd_size); + si->lfb_depth = si->red_size + si->green_size + + si->blue_size + si->rsvd_size; + si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; + } else { + si->lfb_depth = 4; + si->lfb_linelength = si->lfb_width / 2; + si->red_size = 0; + si->red_pos = 0; + si->green_size = 0; + si->green_pos = 0; + si->blue_size = 0; + si->blue_pos = 0; + si->rsvd_size = 0; + si->rsvd_pos = 0; + } +} + +static efi_status_t +__gop_query32(efi_system_table_t *sys_table_arg, + struct efi_graphics_output_protocol_32 *gop32, + struct efi_graphics_output_mode_info **info, + unsigned long *size, u64 *fb_base) +{ + struct efi_graphics_output_protocol_mode_32 *mode; + efi_graphics_output_protocol_query_mode query_mode; + efi_status_t status; + unsigned long m; + + m = gop32->mode; + mode = (struct efi_graphics_output_protocol_mode_32 *)m; + query_mode = (void *)(unsigned long)gop32->query_mode; + + status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size, + info); + if (status != EFI_SUCCESS) + return status; + + *fb_base = mode->frame_buffer_base; + return status; +} + +static efi_status_t +setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, + efi_guid_t *proto, unsigned long size, void **gop_handle) +{ + struct efi_graphics_output_protocol_32 *gop32, *first_gop; + unsigned long nr_gops; + u16 width, height; + u32 pixels_per_scan_line; + u32 ext_lfb_base; + u64 fb_base; + struct efi_pixel_bitmask pixel_info; + int pixel_format; + efi_status_t status = EFI_NOT_FOUND; + u32 *handles = (u32 *)(unsigned long)gop_handle; + int i; + + first_gop = NULL; + gop32 = NULL; + + nr_gops = size / sizeof(u32); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info = NULL; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy = NULL; + efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; + u64 current_fb_base; + + status = efi_call_early(handle_protocol, h, + proto, (void **)&gop32); + if (status != EFI_SUCCESS) + continue; + + status = efi_call_early(handle_protocol, h, + &conout_proto, &dummy); + if (status == EFI_SUCCESS) + conout_found = true; + + status = __gop_query32(sys_table_arg, gop32, &info, &size, + ¤t_fb_base); + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + /* + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; + + /* + * Once we've found a GOP supporting ConOut, + * don't bother looking any further. + */ + first_gop = gop32; + if (conout_found) + break; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto out; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + + ext_lfb_base = (u64)(unsigned long)fb_base >> 32; + if (ext_lfb_base) { + si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + si->ext_lfb_base = ext_lfb_base; + } + + si->pages = 1; + + setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); + + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; +out: + return status; +} + +static efi_status_t +__gop_query64(efi_system_table_t *sys_table_arg, + struct efi_graphics_output_protocol_64 *gop64, + struct efi_graphics_output_mode_info **info, + unsigned long *size, u64 *fb_base) +{ + struct efi_graphics_output_protocol_mode_64 *mode; + efi_graphics_output_protocol_query_mode query_mode; + efi_status_t status; + unsigned long m; + + m = gop64->mode; + mode = (struct efi_graphics_output_protocol_mode_64 *)m; + query_mode = (void *)(unsigned long)gop64->query_mode; + + status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size, + info); + if (status != EFI_SUCCESS) + return status; + + *fb_base = mode->frame_buffer_base; + return status; +} + +static efi_status_t +setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, + efi_guid_t *proto, unsigned long size, void **gop_handle) +{ + struct efi_graphics_output_protocol_64 *gop64, *first_gop; + unsigned long nr_gops; + u16 width, height; + u32 pixels_per_scan_line; + u32 ext_lfb_base; + u64 fb_base; + struct efi_pixel_bitmask pixel_info; + int pixel_format; + efi_status_t status = EFI_NOT_FOUND; + u64 *handles = (u64 *)(unsigned long)gop_handle; + int i; + + first_gop = NULL; + gop64 = NULL; + + nr_gops = size / sizeof(u64); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info = NULL; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy = NULL; + efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; + u64 current_fb_base; + + status = efi_call_early(handle_protocol, h, + proto, (void **)&gop64); + if (status != EFI_SUCCESS) + continue; + + status = efi_call_early(handle_protocol, h, + &conout_proto, &dummy); + if (status == EFI_SUCCESS) + conout_found = true; + + status = __gop_query64(sys_table_arg, gop64, &info, &size, + ¤t_fb_base); + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + /* + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; + + /* + * Once we've found a GOP supporting ConOut, + * don't bother looking any further. + */ + first_gop = gop64; + if (conout_found) + break; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto out; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + + ext_lfb_base = (u64)(unsigned long)fb_base >> 32; + if (ext_lfb_base) { + si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + si->ext_lfb_base = ext_lfb_base; + } + + si->pages = 1; + + setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); + + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; +out: + return status; +} + +/* + * See if we have Graphics Output Protocol + */ +efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, + struct screen_info *si, efi_guid_t *proto, + unsigned long size) +{ + efi_status_t status; + void **gop_handle = NULL; + + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + size, (void **)&gop_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + proto, NULL, &size, gop_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + if (efi_is_64bit()) { + status = setup_gop64(sys_table_arg, si, proto, size, + gop_handle); + } else { + status = setup_gop32(sys_table_arg, si, proto, size, + gop_handle); + } + +free_handle: + efi_call_early(free_pool, gop_handle); + return status; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index c2949909339b..9203bbb28887 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -283,7 +283,8 @@ typedef struct { efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); void *__reserved; void *register_protocol_notify; - void *locate_handle; + efi_status_t (*locate_handle)(int, efi_guid_t *, void *, + unsigned long *, efi_handle_t *); void *locate_device_path; void *install_configuration_table; void *load_image; @@ -628,6 +629,10 @@ void efi_native_runtime_setup(void); EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \ 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) +#define EFI_CONSOLE_OUT_DEVICE_GUID \ + EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \ + 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) + typedef struct { efi_guid_t guid; u64 table; @@ -1214,6 +1219,80 @@ struct efi_simple_text_output_protocol { void *test_string; }; +#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 +#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 +#define PIXEL_BIT_MASK 2 +#define PIXEL_BLT_ONLY 3 +#define PIXEL_FORMAT_MAX 4 + +struct efi_pixel_bitmask { + u32 red_mask; + u32 green_mask; + u32 blue_mask; + u32 reserved_mask; +}; + +struct efi_graphics_output_mode_info { + u32 version; + u32 horizontal_resolution; + u32 vertical_resolution; + int pixel_format; + struct efi_pixel_bitmask pixel_information; + u32 pixels_per_scan_line; +} __packed; + +struct efi_graphics_output_protocol_mode_32 { + u32 max_mode; + u32 mode; + u32 info; + u32 size_of_info; + u64 frame_buffer_base; + u32 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode_64 { + u32 max_mode; + u32 mode; + u64 info; + u64 size_of_info; + u64 frame_buffer_base; + u64 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode { + u32 max_mode; + u32 mode; + unsigned long info; + unsigned long size_of_info; + u64 frame_buffer_base; + unsigned long frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_32 { + u32 query_mode; + u32 set_mode; + u32 blt; + u32 mode; +}; + +struct efi_graphics_output_protocol_64 { + u64 query_mode; + u64 set_mode; + u64 blt; + u64 mode; +}; + +struct efi_graphics_output_protocol { + unsigned long query_mode; + unsigned long set_mode; + unsigned long blt; + struct efi_graphics_output_protocol_mode *mode; +}; + +typedef efi_status_t (*efi_graphics_output_protocol_query_mode)( + struct efi_graphics_output_protocol *, u32, unsigned long *, + struct efi_graphics_output_mode_info **); + extern struct list_head efivar_sysfs_list; static inline void -- cgit v1.2.3 From 801820bee9bccb7c156af2b95c7208f428a06ae7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:53 +0100 Subject: efi/arm/libstub: Make screen_info accessible to the UEFI stub In order to hand over the framebuffer described by the GOP protocol and discovered by the UEFI stub, make struct screen_info accessible by the stub. This involves allocating a loader data buffer and passing it to the kernel proper via a UEFI Configuration Table, since the UEFI stub executes in the context of the decompressor, and cannot access the kernel's copy of struct screen_info directly. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: David Herrmann Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-22-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 3 +++ arch/arm/kernel/setup.c | 3 ++- drivers/firmware/efi/arm-init.c | 34 +++++++++++++++++++++++++++- drivers/firmware/efi/efi.c | 5 +++-- drivers/firmware/efi/libstub/arm32-stub.c | 37 +++++++++++++++++++++++++++++++ include/linux/efi.h | 11 ++++++++- 6 files changed, 88 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index dc30d89a1ed3..25f8b1162c2e 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -64,6 +64,9 @@ void efi_virtmap_unload(void); #define __efi_call_early(f, ...) f(__VA_ARGS__) #define efi_is_64bit() (false) +struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg); +void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si); + /* * A reasonable upper bound for the uncompressed kernel size is 32 MBytes, * so we will reserve that amount of memory. We have no easy way to tell what diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 2c4bea39cf22..7d4e2850910c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -883,7 +883,8 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) request_resource(&ioport_resource, &lp2); } -#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) +#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) || \ + defined(CONFIG_EFI) struct screen_info screen_info = { .orig_video_lines = 30, .orig_video_cols = 80, diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 909d974d35d9..ac95dd8b119f 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -11,12 +11,15 @@ * */ +#define pr_fmt(fmt) "efi: " fmt + #include #include #include #include #include #include +#include #include @@ -51,6 +54,32 @@ static phys_addr_t efi_to_phys(unsigned long addr) return addr; } +static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR; + +static __initdata efi_config_table_type_t arch_tables[] = { + {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, NULL, &screen_info_table}, + {NULL_GUID, NULL, NULL} +}; + +static void __init init_screen_info(void) +{ + struct screen_info *si; + + if (screen_info_table != EFI_INVALID_TABLE_ADDR) { + si = early_memremap_ro(screen_info_table, sizeof(*si)); + if (!si) { + pr_err("Could not map screen_info config table\n"); + return; + } + screen_info = *si; + early_memunmap(si, sizeof(*si)); + + /* dummycon on ARM needs non-zero values for columns/lines */ + screen_info.orig_video_cols = 80; + screen_info.orig_video_lines = 25; + } +} + static int __init uefi_init(void) { efi_char16_t *c16; @@ -108,7 +137,8 @@ static int __init uefi_init(void) goto out; } retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, - sizeof(efi_config_table_t), NULL); + sizeof(efi_config_table_t), + arch_tables); early_memunmap(config_tables, table_size); out: @@ -223,4 +253,6 @@ void __init efi_init(void) PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); } + + init_screen_info(); } diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 583e647912a5..4991371012b4 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -353,8 +353,9 @@ static __init int match_config_table(efi_guid_t *guid, for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) { if (!efi_guidcmp(*guid, table_types[i].guid)) { *(table_types[i].ptr) = table; - pr_cont(" %s=0x%lx ", - table_types[i].name, table); + if (table_types[i].name) + pr_cont(" %s=0x%lx ", + table_types[i].name, table); return 1; } } diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 6f42be4d0084..e1f0b28e1dcb 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -26,6 +26,43 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) return EFI_SUCCESS; } +static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID; + +struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg) +{ + struct screen_info *si; + efi_status_t status; + + /* + * Unlike on arm64, where we can directly fill out the screen_info + * structure from the stub, we need to allocate a buffer to hold + * its contents while we hand over to the kernel proper from the + * decompressor. + */ + status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA, + sizeof(*si), (void **)&si); + + if (status != EFI_SUCCESS) + return NULL; + + status = efi_call_early(install_configuration_table, + &screen_info_guid, si); + if (status == EFI_SUCCESS) + return si; + + efi_call_early(free_pool, si); + return NULL; +} + +void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si) +{ + if (!si) + return; + + efi_call_early(install_configuration_table, &screen_info_guid, NULL); + efi_call_early(free_pool, si); +} + efi_status_t handle_kernel_image(efi_system_table_t *sys_table, unsigned long *image_addr, unsigned long *image_size, diff --git a/include/linux/efi.h b/include/linux/efi.h index 9203bbb28887..e53458842245 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -286,7 +286,7 @@ typedef struct { efi_status_t (*locate_handle)(int, efi_guid_t *, void *, unsigned long *, efi_handle_t *); void *locate_device_path; - void *install_configuration_table; + efi_status_t (*install_configuration_table)(efi_guid_t *, void *); void *load_image; void *start_image; void *exit; @@ -633,6 +633,15 @@ void efi_native_runtime_setup(void); EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \ 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) +/* + * This GUID is used to pass to the kernel proper the struct screen_info + * structure that was populated by the stub based on the GOP protocol instance + * associated with ConOut + */ +#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID \ + EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, \ + 0xb9, 0xe, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) + typedef struct { efi_guid_t guid; u64 table; -- cgit v1.2.3 From 06f7d4a1618dbb086e738c93cd1ef416ab01027d Mon Sep 17 00:00:00 2001 From: "Compostella, Jeremy" Date: Mon, 25 Apr 2016 21:06:57 +0100 Subject: efibc: Add EFI Bootloader Control module This module installs a reboot callback, such that if reboot() is invoked with a string argument NNN, "NNN" is copied to the "LoaderEntryOneShot" EFI variable, to be read by the bootloader. If the string matches one of the boot labels defined in its configuration, the bootloader will boot once to that label. The "LoaderEntryRebootReason" EFI variable is set with the reboot reason: "reboot", "shutdown". The bootloader reads this reboot reason and takes particular action according to its policy. There are reboot implementations that do "reboot ", such as Android's reboot command and Upstart's reboot replacement, which pass the reason as an argument to the reboot syscall. There is no platform-agnostic way how those could be modified to pass the reason to the bootloader, regardless of platform or bootloader. Signed-off-by: Jeremy Compostella Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Stefan Stanacar Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-26-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/Kconfig | 15 +++++++ drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/efibc.c | 101 ++++++++++++++++++++++++++++++++++++++++++ include/linux/efi.h | 4 ++ 4 files changed, 121 insertions(+) create mode 100644 drivers/firmware/efi/efibc.c (limited to 'include/linux') diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index e1670d533f97..0b0b635aa101 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -87,6 +87,21 @@ config EFI_RUNTIME_WRAPPERS config EFI_ARMSTUB bool +config EFI_BOOTLOADER_CONTROL + tristate "EFI Bootloader Control" + depends on EFI_VARS + default n + ---help--- + This module installs a reboot hook, such that if reboot() is + invoked with a string argument NNN, "NNN" is copied to the + "LoaderEntryOneShot" EFI variable, to be read by the + bootloader. If the string matches one of the boot labels + defined in its configuration, the bootloader will boot once + to that label. The "LoaderEntryRebootReason" EFI variable is + set with the reboot reason: "reboot" or "shutdown". The + bootloader reads this reboot reason and takes particular + action according to its policy. + endmenu config UEFI_CPER diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index d5be62399130..b0808080e744 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o obj-$(CONFIG_EFI_STUB) += libstub/ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o +obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o obj-$(CONFIG_ARM) += $(arm-obj-y) diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c new file mode 100644 index 000000000000..2e0c7ccd9d9e --- /dev/null +++ b/drivers/firmware/efi/efibc.c @@ -0,0 +1,101 @@ +/* + * efibc: control EFI bootloaders which obey LoaderEntryOneShot var + * Copyright (c) 2013-2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) "efibc: " fmt + +#include +#include +#include + +static void efibc_str_to_str16(const char *str, efi_char16_t *str16) +{ + size_t i; + + for (i = 0; i < strlen(str); i++) + str16[i] = str[i]; + + str16[i] = '\0'; +} + +static void efibc_set_variable(const char *name, const char *value) +{ + int ret; + efi_guid_t guid = LINUX_EFI_LOADER_ENTRY_GUID; + struct efivar_entry entry; + size_t size = (strlen(value) + 1) * sizeof(efi_char16_t); + + if (size > sizeof(entry.var.Data)) + pr_err("value is too large"); + + efibc_str_to_str16(name, entry.var.VariableName); + efibc_str_to_str16(value, (efi_char16_t *)entry.var.Data); + memcpy(&entry.var.VendorGuid, &guid, sizeof(guid)); + + ret = efivar_entry_set(&entry, + EFI_VARIABLE_NON_VOLATILE + | EFI_VARIABLE_BOOTSERVICE_ACCESS + | EFI_VARIABLE_RUNTIME_ACCESS, + size, entry.var.Data, NULL); + if (ret) + pr_err("failed to set %s EFI variable: 0x%x\n", + name, ret); +} + +static int efibc_reboot_notifier_call(struct notifier_block *notifier, + unsigned long event, void *data) +{ + const char *reason = "shutdown"; + + if (event == SYS_RESTART) + reason = "reboot"; + + efibc_set_variable("LoaderEntryRebootReason", reason); + + if (!data) + return NOTIFY_DONE; + + efibc_set_variable("LoaderEntryOneShot", (char *)data); + + return NOTIFY_DONE; +} + +static struct notifier_block efibc_reboot_notifier = { + .notifier_call = efibc_reboot_notifier_call, +}; + +static int __init efibc_init(void) +{ + int ret; + + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return -ENODEV; + + ret = register_reboot_notifier(&efibc_reboot_notifier); + if (ret) + pr_err("unable to register reboot notifier\n"); + + return ret; +} +module_init(efibc_init); + +static void __exit efibc_exit(void) +{ + unregister_reboot_notifier(&efibc_reboot_notifier); +} +module_exit(efibc_exit); + +MODULE_AUTHOR("Jeremy Compostella "); +MODULE_AUTHOR("Matt Gumbel Date: Mon, 25 Apr 2016 21:06:58 +0100 Subject: efi: Move efi_status_to_err() to drivers/firmware/efi/ Move efi_status_to_err() to the architecture independent code as it's generally useful in all bits of EFI code where there is a need to convert an efi_status_t to a kernel error value. Signed-off-by: Matt Fleming Acked-by: Ard Biesheuvel Cc: Borislav Petkov Cc: Kweh Hock Leong Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joeyli Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-27-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 33 +++++++++++++++++++++++++++++++++ drivers/firmware/efi/vars.c | 33 --------------------------------- include/linux/efi.h | 2 ++ 3 files changed, 35 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4991371012b4..05509f3aaee8 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -636,3 +636,36 @@ u64 __weak efi_mem_attributes(unsigned long phys_addr) } return 0; } + +int efi_status_to_err(efi_status_t status) +{ + int err; + + switch (status) { + case EFI_SUCCESS: + err = 0; + break; + case EFI_INVALID_PARAMETER: + err = -EINVAL; + break; + case EFI_OUT_OF_RESOURCES: + err = -ENOSPC; + break; + case EFI_DEVICE_ERROR: + err = -EIO; + break; + case EFI_WRITE_PROTECTED: + err = -EROFS; + break; + case EFI_SECURITY_VIOLATION: + err = -EACCES; + break; + case EFI_NOT_FOUND: + err = -ENOENT; + break; + default: + err = -EINVAL; + } + + return err; +} diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 34b741940494..0012331d5a3d 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -329,39 +329,6 @@ check_var_size_nonblocking(u32 attributes, unsigned long size) return fops->query_variable_store(attributes, size, true); } -static int efi_status_to_err(efi_status_t status) -{ - int err; - - switch (status) { - case EFI_SUCCESS: - err = 0; - break; - case EFI_INVALID_PARAMETER: - err = -EINVAL; - break; - case EFI_OUT_OF_RESOURCES: - err = -ENOSPC; - break; - case EFI_DEVICE_ERROR: - err = -EIO; - break; - case EFI_WRITE_PROTECTED: - err = -EROFS; - break; - case EFI_SECURITY_VIOLATION: - err = -EACCES; - break; - case EFI_NOT_FOUND: - err = -ENOENT; - break; - default: - err = -EINVAL; - } - - return err; -} - static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor, struct list_head *head) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 4db7052b2699..ca47481885c4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1080,6 +1080,8 @@ static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} #endif +extern int efi_status_to_err(efi_status_t status); + /* * Variable Attributes */ -- cgit v1.2.3 From f0133f3c5b8bb34ec4dec50c27e7a655aeee8935 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:06:59 +0100 Subject: efi: Add 'capsule' update support The EFI capsule mechanism allows data blobs to be passed to the EFI firmware. A common use case is performing firmware updates. This patch just introduces the main infrastructure for interacting with the firmware, and a driver that allows users to upload capsules will come in a later patch. Once a capsule has been passed to the firmware, the next reboot must be performed using the ResetSystem() EFI runtime service, which may involve overriding the reboot type specified by reboot=. This ensures the reset value returned by QueryCapsuleCapabilities() is used to reset the system, which is required for the capsule to be processed. efi_capsule_pending() is provided for this purpose. At the moment we only allow a single capsule blob to be sent to the firmware despite the fact that UpdateCapsule() takes a 'CapsuleCount' parameter. This simplifies the API and shouldn't result in any downside since it is still possible to send multiple capsules by repeatedly calling UpdateCapsule(). Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Bryan O'Donoghue Cc: Kweh Hock Leong Cc: Mark Salter Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joeyli Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-28-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/capsule.c | 300 +++++++++++++++++++++++++++++++++++++++++ drivers/firmware/efi/reboot.c | 12 +- include/linux/efi.h | 14 ++ 4 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 drivers/firmware/efi/capsule.c (limited to 'include/linux') diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index b0808080e744..fb8ad5db6547 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -10,6 +10,7 @@ KASAN_SANITIZE_runtime-wrappers.o := n obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o +obj-$(CONFIG_EFI) += capsule.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_ESRT) += esrt.o obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c new file mode 100644 index 000000000000..0de55944ac0b --- /dev/null +++ b/drivers/firmware/efi/capsule.c @@ -0,0 +1,300 @@ +/* + * EFI capsule support. + * + * Copyright 2013 Intel Corporation; author Matt Fleming + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + */ + +#define pr_fmt(fmt) "efi: " fmt + +#include +#include +#include +#include +#include +#include + +typedef struct { + u64 length; + u64 data; +} efi_capsule_block_desc_t; + +static bool capsule_pending; +static int efi_reset_type = -1; + +/* + * capsule_mutex serialises access to both capsule_pending and + * efi_reset_type. + */ +static DEFINE_MUTEX(capsule_mutex); + +/** + * efi_capsule_pending - has a capsule been passed to the firmware? + * @reset_type: store the type of EFI reset if capsule is pending + * + * To ensure that the registered capsule is processed correctly by the + * firmware we need to perform a specific type of reset. If a capsule is + * pending return the reset type in @reset_type. + * + * This function will race with callers of efi_capsule_update(), for + * example, calling this function while somebody else is in + * efi_capsule_update() but hasn't reached efi_capsue_update_locked() + * will miss the updates to capsule_pending and efi_reset_type after + * efi_capsule_update_locked() completes. + * + * A non-racy use is from platform reboot code because we use + * system_state to ensure no capsules can be sent to the firmware once + * we're at SYSTEM_RESTART. See efi_capsule_update_locked(). + */ +bool efi_capsule_pending(int *reset_type) +{ + bool rv = false; + + mutex_lock(&capsule_mutex); + if (!capsule_pending) + goto out; + + if (reset_type) + *reset_type = efi_reset_type; + rv = true; +out: + mutex_unlock(&capsule_mutex); + return rv; +} + +/* + * Whitelist of EFI capsule flags that we support. + * + * We do not handle EFI_CAPSULE_INITIATE_RESET because that would + * require us to prepare the kernel for reboot. Refuse to load any + * capsules with that flag and any other flags that we do not know how + * to handle. + */ +#define EFI_CAPSULE_SUPPORTED_FLAG_MASK \ + (EFI_CAPSULE_PERSIST_ACROSS_RESET | EFI_CAPSULE_POPULATE_SYSTEM_TABLE) + +/** + * efi_capsule_supported - does the firmware support the capsule? + * @guid: vendor guid of capsule + * @flags: capsule flags + * @size: size of capsule data + * @reset: the reset type required for this capsule + * + * Check whether a capsule with @flags is supported by the firmware + * and that @size doesn't exceed the maximum size for a capsule. + * + * No attempt is made to check @reset against the reset type required + * by any pending capsules because of the races involved. + */ +int efi_capsule_supported(efi_guid_t guid, u32 flags, size_t size, int *reset) +{ + efi_capsule_header_t *capsule; + efi_status_t status; + u64 max_size; + int rv = 0; + + if (flags & ~EFI_CAPSULE_SUPPORTED_FLAG_MASK) + return -EINVAL; + + capsule = kmalloc(sizeof(*capsule), GFP_KERNEL); + if (!capsule) + return -ENOMEM; + + capsule->headersize = capsule->imagesize = sizeof(*capsule); + memcpy(&capsule->guid, &guid, sizeof(efi_guid_t)); + capsule->flags = flags; + + status = efi.query_capsule_caps(&capsule, 1, &max_size, reset); + if (status != EFI_SUCCESS) { + rv = efi_status_to_err(status); + goto out; + } + + if (size > max_size) + rv = -ENOSPC; +out: + kfree(capsule); + return rv; +} +EXPORT_SYMBOL_GPL(efi_capsule_supported); + +/* + * Every scatter gather list (block descriptor) page must end with a + * continuation pointer. The last continuation pointer of the last + * page must be zero to mark the end of the chain. + */ +#define SGLIST_PER_PAGE ((PAGE_SIZE / sizeof(efi_capsule_block_desc_t)) - 1) + +/* + * How many scatter gather list (block descriptor) pages do we need + * to map @count pages? + */ +static inline unsigned int sg_pages_num(unsigned int count) +{ + return DIV_ROUND_UP(count, SGLIST_PER_PAGE); +} + +/** + * efi_capsule_update_locked - pass a single capsule to the firmware + * @capsule: capsule to send to the firmware + * @sg_pages: array of scatter gather (block descriptor) pages + * @reset: the reset type required for @capsule + * + * Since this function must be called under capsule_mutex check + * whether efi_reset_type will conflict with @reset, and atomically + * set it and capsule_pending if a capsule was successfully sent to + * the firmware. + * + * We also check to see if the system is about to restart, and if so, + * abort. This avoids races between efi_capsule_update() and + * efi_capsule_pending(). + */ +static int +efi_capsule_update_locked(efi_capsule_header_t *capsule, + struct page **sg_pages, int reset) +{ + efi_physical_addr_t sglist_phys; + efi_status_t status; + + lockdep_assert_held(&capsule_mutex); + + /* + * If someone has already registered a capsule that requires a + * different reset type, we're out of luck and must abort. + */ + if (efi_reset_type >= 0 && efi_reset_type != reset) { + pr_err("Conflicting capsule reset type %d (%d).\n", + reset, efi_reset_type); + return -EINVAL; + } + + /* + * If the system is getting ready to restart it may have + * called efi_capsule_pending() to make decisions (such as + * whether to force an EFI reboot), and we're racing against + * that call. Abort in that case. + */ + if (unlikely(system_state == SYSTEM_RESTART)) { + pr_warn("Capsule update raced with reboot, aborting.\n"); + return -EINVAL; + } + + sglist_phys = page_to_phys(sg_pages[0]); + + status = efi.update_capsule(&capsule, 1, sglist_phys); + if (status == EFI_SUCCESS) { + capsule_pending = true; + efi_reset_type = reset; + } + + return efi_status_to_err(status); +} + +/** + * efi_capsule_update - send a capsule to the firmware + * @capsule: capsule to send to firmware + * @pages: an array of capsule data pages + * + * Build a scatter gather list with EFI capsule block descriptors to + * map the capsule described by @capsule with its data in @pages and + * send it to the firmware via the UpdateCapsule() runtime service. + * + * @capsule must be a virtual mapping of the first page in @pages + * (@pages[0]) in the kernel address space. That is, a + * capsule_header_t that describes the entire contents of the capsule + * must be at the start of the first data page. + * + * Even though this function will validate that the firmware supports + * the capsule guid, users will likely want to check that + * efi_capsule_supported() returns true before calling this function + * because it makes it easier to print helpful error messages. + * + * If the capsule is successfully submitted to the firmware, any + * subsequent calls to efi_capsule_pending() will return true. @pages + * must not be released or modified if this function returns + * successfully. + * + * Callers must be prepared for this function to fail, which can + * happen if we raced with system reboot or if there is already a + * pending capsule that has a reset type that conflicts with the one + * required by @capsule. Do NOT use efi_capsule_pending() to detect + * this conflict since that would be racy. Instead, submit the capsule + * to efi_capsule_update() and check the return value. + * + * Return 0 on success, a converted EFI status code on failure. + */ +int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages) +{ + u32 imagesize = capsule->imagesize; + efi_guid_t guid = capsule->guid; + unsigned int count, sg_count; + u32 flags = capsule->flags; + struct page **sg_pages; + int rv, reset_type; + int i, j; + + rv = efi_capsule_supported(guid, flags, imagesize, &reset_type); + if (rv) + return rv; + + count = DIV_ROUND_UP(imagesize, PAGE_SIZE); + sg_count = sg_pages_num(count); + + sg_pages = kzalloc(sg_count * sizeof(*sg_pages), GFP_KERNEL); + if (!sg_pages) + return -ENOMEM; + + for (i = 0; i < sg_count; i++) { + sg_pages[i] = alloc_page(GFP_KERNEL); + if (!sg_pages[i]) { + rv = -ENOMEM; + goto out; + } + } + + for (i = 0; i < sg_count; i++) { + efi_capsule_block_desc_t *sglist; + + sglist = kmap(sg_pages[i]); + if (!sglist) { + rv = -ENOMEM; + goto out; + } + + for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) { + u64 sz = min_t(u64, imagesize, PAGE_SIZE); + + sglist[j].length = sz; + sglist[j].data = page_to_phys(*pages++); + + imagesize -= sz; + count--; + } + + /* Continuation pointer */ + sglist[j].length = 0; + + if (i + 1 == sg_count) + sglist[j].data = 0; + else + sglist[j].data = page_to_phys(sg_pages[i + 1]); + + kunmap(sg_pages[i]); + } + + mutex_lock(&capsule_mutex); + rv = efi_capsule_update_locked(capsule, sg_pages, reset_type); + mutex_unlock(&capsule_mutex); + +out: + for (i = 0; rv && i < sg_count; i++) { + if (sg_pages[i]) + __free_page(sg_pages[i]); + } + + kfree(sg_pages); + return rv; +} +EXPORT_SYMBOL_GPL(efi_capsule_update); diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c index 9c59d1c795d1..62ead9b9d871 100644 --- a/drivers/firmware/efi/reboot.c +++ b/drivers/firmware/efi/reboot.c @@ -9,7 +9,8 @@ int efi_reboot_quirk_mode = -1; void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) { - int efi_mode; + const char *str[] = { "cold", "warm", "shutdown", "platform" }; + int efi_mode, cap_reset_mode; if (!efi_enabled(EFI_RUNTIME_SERVICES)) return; @@ -30,6 +31,15 @@ void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) if (efi_reboot_quirk_mode != -1) efi_mode = efi_reboot_quirk_mode; + if (efi_capsule_pending(&cap_reset_mode)) { + if (efi_mode != cap_reset_mode) + printk(KERN_CRIT "efi: %s reset requested but pending " + "capsule update requires %s reset... Performing " + "%s reset.\n", str[efi_mode], str[cap_reset_mode], + str[cap_reset_mode]); + efi_mode = cap_reset_mode; + } + efi.reset_system(efi_mode, EFI_SUCCESS, 0, NULL); } diff --git a/include/linux/efi.h b/include/linux/efi.h index ca47481885c4..a3b4c1ec38c0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -124,6 +124,13 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; +/* + * EFI capsule flags + */ +#define EFI_CAPSULE_PERSIST_ACROSS_RESET 0x00010000 +#define EFI_CAPSULE_POPULATE_SYSTEM_TABLE 0x00020000 +#define EFI_CAPSULE_INITIATE_RESET 0x00040000 + /* * Allocation types for calls to boottime->allocate_pages. */ @@ -1370,6 +1377,13 @@ int efivars_sysfs_init(void); #define EFIVARS_DATA_SIZE_MAX 1024 #endif /* CONFIG_EFI_VARS */ +extern bool efi_capsule_pending(int *reset_type); + +extern int efi_capsule_supported(efi_guid_t guid, u32 flags, + size_t size, int *reset); + +extern int efi_capsule_update(efi_capsule_header_t *capsule, + struct page **pages); #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -- cgit v1.2.3 From 87615a34d561ef59bd0cffc73256a21220dfdffd Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:07:00 +0100 Subject: x86/efi: Force EFI reboot to process pending capsules If an EFI capsule has been sent to the firmware we must match the type of EFI reset against that required by the capsule to ensure it is processed correctly. Force an EFI reboot if a capsule is pending for the next reset. Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Kweh Hock Leong Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joeyli Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-29-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 9 +++++++++ include/linux/efi.h | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index ab0adc0fa5db..a9b31eb815f2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -535,6 +535,15 @@ static void native_machine_emergency_restart(void) mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; *((unsigned short *)__va(0x472)) = mode; + /* + * If an EFI capsule has been registered with the firmware then + * override the reboot= parameter. + */ + if (efi_capsule_pending(NULL)) { + pr_info("EFI capsule is pending, forcing EFI reboot.\n"); + reboot_type = BOOT_EFI; + } + for (;;) { /* Could also try the reset bit in the Hammer NB */ switch (reboot_type) { diff --git a/include/linux/efi.h b/include/linux/efi.h index a3b4c1ec38c0..aa36fb8bea4b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1085,6 +1085,12 @@ static inline bool efi_enabled(int feature) } static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} + +static inline bool +efi_capsule_pending(int *reset_type) +{ + return false; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3 From f98db6013c557c216da5038d9c52045be55cd039 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 09:39:06 -0700 Subject: sched/core: Add switch_mm_irqs_off() and use it in the scheduler By default, this is the same thing as switch_mm(). x86 will override it as an optimization. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/df401df47bdd6be3e389c6f1e3f5310d70e81b2c.1461688545.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mmu_context.h | 7 +++++++ kernel/sched/core.c | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index 70fffeba7495..a4441784503b 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -1,9 +1,16 @@ #ifndef _LINUX_MMU_CONTEXT_H #define _LINUX_MMU_CONTEXT_H +#include + struct mm_struct; void use_mm(struct mm_struct *mm); void unuse_mm(struct mm_struct *mm); +/* Architectures that care about IRQ state in switch_mm can override this. */ +#ifndef switch_mm_irqs_off +# define switch_mm_irqs_off switch_mm +#endif + #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9d84d6004745..adcafda7ffc8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include @@ -2733,7 +2733,7 @@ context_switch(struct rq *rq, struct task_struct *prev, atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else - switch_mm(oldmm, mm, next); + switch_mm_irqs_off(oldmm, mm, next); if (!prev->mm) { prev->active_mm = NULL; @@ -5274,7 +5274,7 @@ void idle_task_exit(void) BUG_ON(cpu_online(smp_processor_id())); if (mm != &init_mm) { - switch_mm(mm, &init_mm, current); + switch_mm_irqs_off(mm, &init_mm, current); finish_arch_post_lock_switch(); } mmdrop(mm); -- cgit v1.2.3 From 3358d2d9f47af86bdd71edb24b361f72a54ec04e Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Thu, 18 Jun 2015 17:28:40 -0400 Subject: clk: tegra: Add interface to enable hardware control of SATA/XUSB PLLs On Tegra210, hardware control of the SATA and XUSB pad PLLs must be done during the UPHY enable sequence rather than the PLLE enable sequence. Export functions to do this so that hardware control can be enabled from the XUSB padctl driver. Signed-off-by: Andrew Bresticker Signed-off-by: Rhyland Klein Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-tegra210.c | 58 ++++++++++++++++++++++++++++++++++++++++ include/linux/clk/tegra.h | 5 ++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c index 637041fd53ad..3d0edee1f9fe 100644 --- a/drivers/clk/tegra/clk-tegra210.c +++ b/drivers/clk/tegra/clk-tegra210.c @@ -175,6 +175,19 @@ #define UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN BIT(14) #define UTMIP_PLL_CFG1_FORCE_PLL_ACTIVE_POWERDOWN BIT(12) +#define SATA_PLL_CFG0 0x490 +#define SATA_PLL_CFG0_PADPLL_RESET_SWCTL BIT(0) +#define SATA_PLL_CFG0_PADPLL_USE_LOCKDET BIT(2) +#define SATA_PLL_CFG0_PADPLL_SLEEP_IDDQ BIT(13) +#define SATA_PLL_CFG0_SEQ_ENABLE BIT(24) + +#define XUSBIO_PLL_CFG0 0x51c +#define XUSBIO_PLL_CFG0_PADPLL_RESET_SWCTL BIT(0) +#define XUSBIO_PLL_CFG0_CLK_ENABLE_SWCTL BIT(2) +#define XUSBIO_PLL_CFG0_PADPLL_USE_LOCKDET BIT(6) +#define XUSBIO_PLL_CFG0_PADPLL_SLEEP_IDDQ BIT(13) +#define XUSBIO_PLL_CFG0_SEQ_ENABLE BIT(24) + #define UTMIPLL_HW_PWRDN_CFG0 0x52c #define UTMIPLL_HW_PWRDN_CFG0_UTMIPLL_LOCK BIT(31) #define UTMIPLL_HW_PWRDN_CFG0_SEQ_START_STATE BIT(25) @@ -416,6 +429,51 @@ static const char *mux_pllmcp_clkm[] = { #define PLLU_MISC0_WRITE_MASK 0xbfffffff #define PLLU_MISC1_WRITE_MASK 0x00000007 +void tegra210_xusb_pll_hw_control_enable(void) +{ + u32 val; + + val = readl_relaxed(clk_base + XUSBIO_PLL_CFG0); + val &= ~(XUSBIO_PLL_CFG0_CLK_ENABLE_SWCTL | + XUSBIO_PLL_CFG0_PADPLL_RESET_SWCTL); + val |= XUSBIO_PLL_CFG0_PADPLL_USE_LOCKDET | + XUSBIO_PLL_CFG0_PADPLL_SLEEP_IDDQ; + writel_relaxed(val, clk_base + XUSBIO_PLL_CFG0); +} +EXPORT_SYMBOL_GPL(tegra210_xusb_pll_hw_control_enable); + +void tegra210_xusb_pll_hw_sequence_start(void) +{ + u32 val; + + val = readl_relaxed(clk_base + XUSBIO_PLL_CFG0); + val |= XUSBIO_PLL_CFG0_SEQ_ENABLE; + writel_relaxed(val, clk_base + XUSBIO_PLL_CFG0); +} +EXPORT_SYMBOL_GPL(tegra210_xusb_pll_hw_sequence_start); + +void tegra210_sata_pll_hw_control_enable(void) +{ + u32 val; + + val = readl_relaxed(clk_base + SATA_PLL_CFG0); + val &= ~SATA_PLL_CFG0_PADPLL_RESET_SWCTL; + val |= SATA_PLL_CFG0_PADPLL_USE_LOCKDET | + SATA_PLL_CFG0_PADPLL_SLEEP_IDDQ; + writel_relaxed(val, clk_base + SATA_PLL_CFG0); +} +EXPORT_SYMBOL_GPL(tegra210_sata_pll_hw_control_enable); + +void tegra210_sata_pll_hw_sequence_start(void) +{ + u32 val; + + val = readl_relaxed(clk_base + SATA_PLL_CFG0); + val |= SATA_PLL_CFG0_SEQ_ENABLE; + writel_relaxed(val, clk_base + SATA_PLL_CFG0); +} +EXPORT_SYMBOL_GPL(tegra210_sata_pll_hw_sequence_start); + static inline void _pll_misc_chk_default(void __iomem *base, struct tegra_clk_pll_params *params, u8 misc_num, u32 default_val, u32 mask) diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 57bf7aab4516..7007a5f48080 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -121,4 +121,9 @@ static inline void tegra_cpu_clock_resume(void) } #endif +extern void tegra210_xusb_pll_hw_control_enable(void); +extern void tegra210_xusb_pll_hw_sequence_start(void); +extern void tegra210_sata_pll_hw_control_enable(void); +extern void tegra210_sata_pll_hw_sequence_start(void); + #endif /* __LINUX_CLK_TEGRA_H_ */ -- cgit v1.2.3 From d708b384c06dddeb35d46a6127c08a7403fa2faf Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:21 +0530 Subject: PM / OPP: -ENOSYS is applicable only to syscalls Some of the routines have used -ENOSYS for the cases where the functionality isn't implemented in the kernel. But ENOSYS is supposed to be used only for syscalls. Replace that with -ENOTSUPP, which specifically means that the operation isn't supported. While at it, replace exiting -EINVAL errors for similar cases to -ENOTSUPP. Signed-off-by: Viresh Kumar Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5b6ad31403a5..a2df8f6fcc25 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -110,25 +110,25 @@ static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq) @@ -148,40 +148,40 @@ static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq) static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( struct device *dev) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_put_supported_hw(struct device *dev) {} static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_put_prop_name(struct device *dev) {} static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_put_regulator(struct device *dev) {} static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { - return -EINVAL; + return -ENOTSUPP; } static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { - return -ENOSYS; + return -ENOTSUPP; } #endif /* CONFIG_PM_OPP */ @@ -195,7 +195,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask #else static inline int dev_pm_opp_of_add_table(struct device *dev) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_of_remove_table(struct device *dev) @@ -204,7 +204,7 @@ static inline void dev_pm_opp_of_remove_table(struct device *dev) static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask) { - return -ENOSYS; + return -ENOTSUPP; } static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask) @@ -213,7 +213,7 @@ static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask) static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { - return -ENOSYS; + return -ENOTSUPP; } #endif -- cgit v1.2.3 From dde370b23c1787a9c723ac049d56a3014937f889 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:22 +0530 Subject: PM / OPP: Mark cpumask as const in dev_pm_opp_set_sharing_cpus() dev_pm_opp_set_sharing_cpus() isn't supposed to update the cpumask passed as its parameter, and so it should always have been marked 'const'. Do it now. Signed-off-by: Viresh Kumar Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/cpu.c | 3 ++- include/linux/pm_opp.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 55cbf9bd8707..5469e7730ff2 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -287,7 +287,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus); * that this function is *NOT* called under RCU protection or in contexts where * mutex cannot be locked. */ -int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, + const cpumask_var_t cpumask) { struct opp_device *opp_dev; struct opp_table *opp_table; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a2df8f6fcc25..0c08ed3836b1 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -65,7 +65,7 @@ void dev_pm_opp_put_prop_name(struct device *dev); int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); -int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); +int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -179,7 +179,7 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f return -ENOTSUPP; } -static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask) { return -ENOTSUPP; } -- cgit v1.2.3 From 6f707daa3833761110a03478cba5cc4b708ec77d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:23 +0530 Subject: PM / OPP: Add dev_pm_opp_get_sharing_cpus() OPP core allows a platform to mark OPP table as shared, when the platform isn't using operating-points-v2 bindings. And, so there should be a non DT way of finding out if the OPP table is shared or not. This patch adds dev_pm_opp_get_sharing_cpus(), which first tries to get OPP sharing information from the opp-table (in case it is already marked as shared), otherwise it uses the existing DT way of finding sharing information. Signed-off-by: Viresh Kumar Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/cpu.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 6 ++++++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 5469e7730ff2..3428380dfca7 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -330,3 +330,48 @@ unlock: return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus); + +/** + * dev_pm_opp_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with @cpu_dev + * @cpu_dev: CPU device for which we do this operation + * @cpumask: cpumask to update with information of sharing CPUs + * + * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev. + * + * Returns -ENODEV if OPP table isn't already present. + * + * Locking: The internal opp_table and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +{ + struct opp_device *opp_dev; + struct opp_table *opp_table; + int ret = 0; + + mutex_lock(&opp_table_lock); + + opp_table = _find_opp_table(cpu_dev); + if (IS_ERR(opp_table)) { + ret = PTR_ERR(opp_table); + goto unlock; + } + + cpumask_clear(cpumask); + + if (opp_table->shared_opp) { + list_for_each_entry(opp_dev, &opp_table->dev_list, node) + cpumask_set_cpu(opp_dev->dev->id, cpumask); + } else { + cpumask_set_cpu(cpu_dev->id, cpumask); + } + +unlock: + mutex_unlock(&opp_table_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_sharing_cpus); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0c08ed3836b1..15f554443b59 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -66,6 +66,7 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask); +int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -184,6 +185,11 @@ static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpum return -ENOTSUPP; } +static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +{ + return -EINVAL; +} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) -- cgit v1.2.3 From eb96924acddc709db58221c210ca05cd9effb1df Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:26 +0530 Subject: cpufreq: dt: Kill platform-data There are no more users of platform-data for cpufreq-dt driver, get rid of it. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 6 +----- include/linux/cpufreq-dt.h | 24 ------------------------ 2 files changed, 1 insertion(+), 29 deletions(-) delete mode 100644 include/linux/cpufreq-dt.h (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index aca9bec00f91..3957de801ae8 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -217,10 +216,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) } if (fallback) { - struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); - - if (!pd || !pd->independent_clocks) - cpumask_setall(policy->cpus); + cpumask_setall(policy->cpus); /* * OPP tables are initialized only for policy->cpu, do it for diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h deleted file mode 100644 index a87335a1660c..000000000000 --- a/include/linux/cpufreq-dt.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2014 Marvell - * Thomas Petazzoni - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __CPUFREQ_DT_H__ -#define __CPUFREQ_DT_H__ - -#include - -struct cpufreq_dt_platform_data { - /* - * True when each CPU has its own clock to control its - * frequency, false when all CPUs are controlled by a single - * clock. - */ - bool independent_clocks; -}; - -#endif /* __CPUFREQ_DT_H__ */ -- cgit v1.2.3 From feb26ac31a2a5cb88d86680d9a94916a6343e9e6 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Mon, 25 Apr 2016 13:48:38 +0100 Subject: usb: core: hub: hub_port_init lock controller instead of bus The XHCI controller presents two USB buses to the system - one for USB2 and one for USB3. The hub init code (hub_port_init) is reentrant but only locks one bus per thread, leading to a race condition failure when two threads attempt to simultaneously initialise a USB2 and USB3 device: [ 8.034843] xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command [ 13.183701] usb 3-3: device descriptor read/all, error -110 On a test system this failure occurred on 6% of all boots. The call traces at the point of failure are: Call Trace: [] schedule+0x37/0x90 [] usb_kill_urb+0x8d/0xd0 [] ? wake_up_atomic_t+0x30/0x30 [] usb_start_wait_urb+0xbe/0x150 [] usb_control_msg+0xbc/0xf0 [] hub_port_init+0x51e/0xb70 [] hub_event+0x817/0x1570 [] process_one_work+0x1ff/0x620 [] ? process_one_work+0x15f/0x620 [] worker_thread+0x64/0x4b0 [] ? rescuer_thread+0x390/0x390 [] kthread+0x105/0x120 [] ? kthread_create_on_node+0x200/0x200 [] ret_from_fork+0x3f/0x70 [] ? kthread_create_on_node+0x200/0x200 Call Trace: [] xhci_setup_device+0x53d/0xa40 [] xhci_address_device+0xe/0x10 [] hub_port_init+0x1bf/0xb70 [] ? trace_hardirqs_on+0xd/0x10 [] hub_event+0x817/0x1570 [] process_one_work+0x1ff/0x620 [] ? process_one_work+0x15f/0x620 [] worker_thread+0x64/0x4b0 [] ? rescuer_thread+0x390/0x390 [] kthread+0x105/0x120 [] ? kthread_create_on_node+0x200/0x200 [] ret_from_fork+0x3f/0x70 [] ? kthread_create_on_node+0x200/0x200 Which results from the two call chains: hub_port_init usb_get_device_descriptor usb_get_descriptor usb_control_msg usb_internal_control_msg usb_start_wait_urb usb_submit_urb / wait_for_completion_timeout / usb_kill_urb hub_port_init hub_set_address xhci_address_device xhci_setup_device Mathias Nyman explains the current behaviour violates the XHCI spec: hub_port_reset() will end up moving the corresponding xhci device slot to default state. As hub_port_reset() is called several times in hub_port_init() it sounds reasonable that we could end up with two threads having their xhci device slots in default state at the same time, which according to xhci 4.5.3 specs still is a big no no: "Note: Software shall not transition more than one Device Slot to the Default State at a time" So both threads fail at their next task after this. One fails to read the descriptor, and the other fails addressing the device. Fix this in hub_port_init by locking the USB controller (instead of an individual bus) to prevent simultaneous initialisation of both buses. Fixes: 638139eb95d2 ("usb: hub: allow to process more usb hub events in parallel") Link: https://lkml.org/lkml/2016/2/8/312 Link: https://lkml.org/lkml/2016/2/4/748 Signed-off-by: Chris Bainbridge Cc: stable Acked-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 15 +++++++++++++-- drivers/usb/core/hub.c | 8 ++++---- include/linux/usb.h | 3 +-- include/linux/usb/hcd.h | 1 + 4 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 2ca2cef7f681..980fc5774151 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -994,7 +994,7 @@ static void usb_bus_init (struct usb_bus *bus) bus->bandwidth_allocated = 0; bus->bandwidth_int_reqs = 0; bus->bandwidth_isoc_reqs = 0; - mutex_init(&bus->usb_address0_mutex); + mutex_init(&bus->devnum_next_mutex); } /*-------------------------------------------------------------------------*/ @@ -2521,6 +2521,14 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, return NULL; } if (primary_hcd == NULL) { + hcd->address0_mutex = kmalloc(sizeof(*hcd->address0_mutex), + GFP_KERNEL); + if (!hcd->address0_mutex) { + kfree(hcd); + dev_dbg(dev, "hcd address0 mutex alloc failed\n"); + return NULL; + } + mutex_init(hcd->address0_mutex); hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex), GFP_KERNEL); if (!hcd->bandwidth_mutex) { @@ -2532,6 +2540,7 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, dev_set_drvdata(dev, hcd); } else { mutex_lock(&usb_port_peer_mutex); + hcd->address0_mutex = primary_hcd->address0_mutex; hcd->bandwidth_mutex = primary_hcd->bandwidth_mutex; hcd->primary_hcd = primary_hcd; primary_hcd->primary_hcd = primary_hcd; @@ -2598,8 +2607,10 @@ static void hcd_release(struct kref *kref) struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref); mutex_lock(&usb_port_peer_mutex); - if (usb_hcd_is_primary_hcd(hcd)) + if (usb_hcd_is_primary_hcd(hcd)) { + kfree(hcd->address0_mutex); kfree(hcd->bandwidth_mutex); + } if (hcd->shared_hcd) { struct usb_hcd *peer = hcd->shared_hcd; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index c2270d8fac12..bee13517676f 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2082,7 +2082,7 @@ static void choose_devnum(struct usb_device *udev) struct usb_bus *bus = udev->bus; /* be safe when more hub events are proceed in parallel */ - mutex_lock(&bus->usb_address0_mutex); + mutex_lock(&bus->devnum_next_mutex); if (udev->wusb) { devnum = udev->portnum + 1; BUG_ON(test_bit(devnum, bus->devmap.devicemap)); @@ -2100,7 +2100,7 @@ static void choose_devnum(struct usb_device *udev) set_bit(devnum, bus->devmap.devicemap); udev->devnum = devnum; } - mutex_unlock(&bus->usb_address0_mutex); + mutex_unlock(&bus->devnum_next_mutex); } static void release_devnum(struct usb_device *udev) @@ -4366,7 +4366,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (oldspeed == USB_SPEED_LOW) delay = HUB_LONG_RESET_TIME; - mutex_lock(&hdev->bus->usb_address0_mutex); + mutex_lock(hcd->address0_mutex); /* Reset the device; full speed may morph to high speed */ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ @@ -4652,7 +4652,7 @@ fail: hub_port_disable(hub, port1, 0); update_devnum(udev, devnum); /* for disconnect processing */ } - mutex_unlock(&hdev->bus->usb_address0_mutex); + mutex_unlock(hcd->address0_mutex); return retval; } diff --git a/include/linux/usb.h b/include/linux/usb.h index 7824f4557d50..01b6c61cf9bb 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -374,13 +374,12 @@ struct usb_bus { int devnum_next; /* Next open device number in * round-robin allocation */ + struct mutex devnum_next_mutex; /* devnum_next mutex */ struct usb_devmap devmap; /* device address allocation map */ struct usb_device *root_hub; /* Root hub */ struct usb_bus *hs_companion; /* Companion EHCI bus, if any */ - struct mutex usb_address0_mutex; /* unaddressed device mutex */ - int bandwidth_allocated; /* on this bus: how much of the time * reserved for periodic (intr/iso) * requests is used, on average? diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index b98f831dcda3..66fc13705ab7 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -181,6 +181,7 @@ struct usb_hcd { * bandwidth_mutex should be dropped after a successful control message * to the device, or resetting the bandwidth after a failed attempt. */ + struct mutex *address0_mutex; struct mutex *bandwidth_mutex; struct usb_hcd *shared_hcd; struct usb_hcd *primary_hcd; -- cgit v1.2.3 From 7b7483409f09c15f30ac43242ead1ab3061e1f59 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 25 Apr 2016 15:13:17 -0300 Subject: net: fix net_gso_ok for new GSO types. Fix casting in net_gso_ok. Otherwise the shift on gso_type << NETIF_F_GSO_SHIFT may hit the 32th bit and make it look like a INT_MIN, which is then promoted from signed to uint64 which is 0xffffffff80000000, resulting in wrong behavior when it is and'ed with the feature itself, as in: This test app: #include #include int main(int argc, char **argv) { uint64_t feature1; uint64_t feature2; int gso_type = 1 << 15; feature1 = gso_type << 16; feature2 = (uint64_t)gso_type << 16; printf("%lx %lx\n", feature1, feature2); return 0; } Gives: ffffffff80000000 80000000 So that this: return (features & feature) == feature; Actually works on more bits than expected and invalid ones. Fix is to promote it earlier. Issue noted while rebasing SCTP GSO patch but posting separetely as someone else may experience this meanwhile. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8395308a2445..b3c46b019ac1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4004,7 +4004,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { - netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; + netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT; /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); -- cgit v1.2.3 From 0a2cf20c3fb62ad4717276b5303bf831f7b29d54 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 27 Apr 2016 23:39:01 -0400 Subject: tcp: remove SKBTX_ACK_TSTAMP since it is redundant The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when the timestamp of the TCP acknowledgement should be reported on error queue. Since accessing skb_shinfo is likely to incur a cache-line miss at the time of receiving the ack, the txstamp_ack bit was added in tcp_skb_cb, which is set iff the SKBTX_ACK_TSTAMP flag is set for an skb. This makes SKBTX_ACK_TSTAMP flag redundant. Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit everywhere. Note that this frees one bit in shinfo->tx_flags. Signed-off-by: Soheil Hassas Yeganeh Acked-by: Martin KaFai Lau Suggested-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +----- net/ipv4/tcp.c | 5 +++-- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_output.c | 17 +++++++++++------ net/socket.c | 3 --- 5 files changed, 16 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a1ce63979ad8..c84a5a1078c5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -382,14 +382,10 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, - - /* generate software timestamp on peer data acknowledgment */ - SKBTX_ACK_TSTAMP = 1 << 7, }; #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ - SKBTX_SCHED_TSTAMP | \ - SKBTX_ACK_TSTAMP) + SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 53890a730ff4..91993782a947 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -435,9 +435,10 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); - if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) + if (tsflags & SOF_TIMESTAMPING_TX_ACK) + tcb->txstamp_ack = 1; + if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; - tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d5239c283cb..70c370b93762 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3087,8 +3087,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, return; shinfo = skb_shinfo(skb); - if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - !before(shinfo->tskey, prior_snd_una) && + if (!before(shinfo->tskey, prior_snd_una) && before(shinfo->tskey, tcp_sk(sk)->snd_una)) __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b1b2045ac3a9..b3a31b4df57c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1111,11 +1111,17 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de tcp_verify_left_out(tp); } +static bool tcp_has_tx_tstamp(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->txstamp_ack || + (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP); +} + static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) { struct skb_shared_info *shinfo = skb_shinfo(skb); - if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && + if (unlikely(tcp_has_tx_tstamp(skb)) && !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { struct skb_shared_info *shinfo2 = skb_shinfo(skb2); u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; @@ -2446,13 +2452,12 @@ u32 __tcp_select_window(struct sock *sk) void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb) { - const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); - u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; - - if (unlikely(tsflags)) { + if (unlikely(tcp_has_tx_tstamp(next_skb))) { + const struct skb_shared_info *next_shinfo = + skb_shinfo(next_skb); struct skb_shared_info *shinfo = skb_shinfo(skb); - shinfo->tx_flags |= tsflags; + shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; shinfo->tskey = next_shinfo->tskey; TCP_SKB_CB(skb)->txstamp_ack |= TCP_SKB_CB(next_skb)->txstamp_ack; diff --git a/net/socket.c b/net/socket.c index 5dbb0bbe12a7..7789d79609dd 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; - if (tsflags & SOF_TIMESTAMPING_TX_ACK) - flags |= SKBTX_ACK_TSTAMP; - *tx_flags = flags; } EXPORT_SYMBOL(__sock_tx_timestamp); -- cgit v1.2.3 From 8453c5cafd32c4d6bd13ec4a62d4b639f4edb222 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 28 Apr 2016 08:21:03 -0700 Subject: ARM: OMAP2+: Add more functions to pwm pdata for ir-rx51 Before we start removing omap3 legacy booting support, let's make n900 DT booting behave the same way for ir-rx51 as the legacy booting does. For now, we need to pass pdata to the ir-rx51 driver. This means that the n900 tree can move to using DT based booting without having to carry all the legacy platform data with it when it gets dropped from the mainline tree. Note that the ir-rx51 driver is currently disabled because of the dependency to !ARCH_MULTIPLATFORM. This will get sorted out later with the help of drivers/pwm/pwm-omap-dmtimer.c. But first we need to add chained IRQ support to dmtimer code to avoid introducing new custom frameworks. So let's just pass the necessary dmtimer functions to ir-rx51 so we can get it working in the following patch. Cc: Neil Armstrong Tested-by: Ivaylo Dimitrov Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-rx51-peripherals.c | 35 ++++++++++++++++++++++++-- arch/arm/mach-omap2/pdata-quirks.c | 33 +++++++++++++++++++++++- include/linux/platform_data/media/ir-rx51.h | 1 + include/linux/platform_data/pwm_omap_dmtimer.h | 21 ++++++++++++++++ 4 files changed, 87 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index da174c0d603b..9a7073949d1d 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -30,6 +30,8 @@ #include #include +#include + #include #include "common.h" @@ -47,9 +49,8 @@ #include